{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "fe8ec9ca",
   "metadata": {},
   "source": [
    "## 机器翻译数据集"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8216c071",
   "metadata": {},
   "source": [
    "### 数据集引入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a006bcdd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "# 直接读取\n",
    "with open('data/有英语-中文普通话对应句 - 2023-02-18.tsv', encoding='utf-8') as f:\n",
    "    lines = f.readlines()\n",
    "print(lines[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e045bb46",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 只读取有效内容\n",
    "with open('data/有英语-中文普通话对应句 - 2023-02-18.tsv', encoding='utf-8') as f:\n",
    "    data = []\n",
    "    for line in f.readlines():\n",
    "        data.append(line.strip().split('\\t')[1]+'\\t'+line.strip().split('\\t')[3])\n",
    "print(data[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eed4547a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 找出特殊字符\n",
    "import re\n",
    "import string\n",
    "\n",
    "content = ''.join(data)\n",
    "special_char = re.sub(r'[\\u4e00-\\u9fa5]', ' ', content)  # 匹配中文，将中文替换掉\n",
    "\n",
    "print(set(special_char) - set(string.ascii_letters) - set(string.digits))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a139889b",
   "metadata": {},
   "source": [
    "### 数据清洗"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10c49036",
   "metadata": {},
   "outputs": [],
   "source": [
    "def cleaning(data):\n",
    "    for i in range(len(data)):\n",
    "        # 替换特殊字符\n",
    "        data[i] = data[i].replace('\\u200b', '')\n",
    "        data[i] = data[i].replace('\\u200f', '')\n",
    "        data[i] = data[i].replace('\\xad', '')\n",
    "        data[i] = data[i].replace('\\u3000', ' ')\n",
    "        eng_mark = [',', '.', '!', '?'] # 因为标点前加空格\n",
    "        for mark in eng_mark:\n",
    "            data[i] = data[i].replace(mark, ' '+mark)\n",
    "        data[i] = data[i].lower()  # 统一替换为小写\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8400694a",
   "metadata": {},
   "outputs": [],
   "source": [
    "cleaning(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79287a7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def tokenize(data):\n",
    "    # 分别存储源语言和目标语言的词元\n",
    "    src_tokens, tgt_tokens = [], []\n",
    "    for line in data:\n",
    "        pair = line.split('\\t')\n",
    "        src = pair[0].split(' ')\n",
    "        tgt = list(pair[1])\n",
    "        src_tokens.append(src)\n",
    "        tgt_tokens.append(tgt)\n",
    "    return src_tokens, tgt_tokens\n",
    "\n",
    "src_tokens, tgt_tokens = tokenize(data)\n",
    "print(\"src:\", src_tokens[:6])\n",
    "print(\"tgt:\", tgt_tokens[:6])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ed90e8c6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def statistics(tokens):\n",
    "    max_len = 80 #只统计长度80以下的\n",
    "    len_list = range(max_len)  # 长度值\n",
    "    freq_list = np.zeros(max_len)  # 频率值\n",
    "    for token in tokens:\n",
    "        if len(token) < max_len:\n",
    "            freq_list[len_list.index(len(token))] += 1\n",
    "    return len_list, freq_list\n",
    "\n",
    "s1, s2 = statistics(src_tokens)\n",
    "t1, t2 = statistics(tgt_tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a80754f7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x7fc43528fd90>]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD6CAYAAABDPiuvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8+yak3AAAACXBIWXMAAAsTAAALEwEAmpwYAAAvsUlEQVR4nO3de3ic9X3n/fd3RmfJOtnCtiQbGzAY43A0BpI0TaEBJyWY7aZd0rRhs+xDN2WfNk2fJvB0n2abNtfVXNstTbYbsjSQkDQLYZM00DSBEEKbtuFgcwrYBnwCLFu2ZVvnw0gz833+uO+Rx7JGc5bG9ud1Xbpm5nffc893RvJ8/TubuyMiIme2yEIHICIiC0/JQERElAxERETJQEREUDIQERGUDEREhBySgZndb2aHzezVtLL/ZmavmdnPzezvzKw17dhdZrbLzF43sxvSyjeFZbvM7M608tVm9mxY/i0zqynh+xMRkRxYtnkGZvYeYAT4uruvD8uuB37i7nEz+zyAu3/azNYBDwIbgU7gx8D54aXeAN4H9ABbgA+7+3Yzexj4rrs/ZGZfBl5293uyBb5kyRJftWpV3m9YRORM9vzzzx9x946Z5VXZnujuPzWzVTPKfpT28BngQ+H9zcBD7h4D9prZLoLEALDL3fcAmNlDwGYz2wFcC/xGeM4DwH8FsiaDVatWsXXr1myniYhIGjN7a7byUvQZ/Afgh+H9LmBf2rGesCxT+WJgwN3jM8pFRGQeFZUMzOyPgDjwzdKEk/X1bjezrWa2ta+vbz5eUkTkjFBwMjCzfw/cCHzEj3c87AdWpJ3WHZZlKj8KtJpZ1YzyWbn7ve6+wd03dHSc1OQlIiIFKigZmNkm4FPATe4+lnboUeAWM6s1s9XAGuA5gg7jNeHIoRrgFuDRMIk8xfE+h1uBRwp7KyIiUqhchpY+CDwNXGBmPWZ2G/DXwCLgCTN7KRwFhLtvAx4GtgOPAXe4eyLsE/jPwOPADuDh8FyATwOfDDubFwP3lfQdiohIVlmHllaqDRs2uEYTiYjkx8yed/cNM8s1A1lERJQMyulfdh5h56HhhQ5DRCQrJYMySSSdj//t8/z5D19b6FBERLJSMiiTHb1DDMfibO8dWuhQRESyUjIok+f2HgOgd3CCY6OTCxyNiMjclAzKJJUMIKgliIhUMiWDMnB3trx5jF+6IJglve3A4AJHJCIyNyWDMtjdN8rR0Uk2rV/G8pY6th9QzUBEKpuSQRmkmoiuXNXOuuXN6kQWkYqnZFAGz+09ypKmWlYvaWRdZzO7+0aZmEosdFgiIhkpGZTBljf7uWp1O2bGRZ3NJJLO6wc1+UxEKpeSQYn19I+xf2CcjavbAVi3vAVATUUiUtGUDEos1V+QSgbdbfUsqq1SJ7KIVDQlgxJ7bu8xmuuquGDpIgAiEePC5c0aXioiFU3JoMSe23uMK1e1E4nYdNm6zmZeOzhMInlqLhcuIqc/JYMS6huOsefI6HQTUcq6zmbGJhO8dXR0gSITEZmbkkEJbXnzxP6ClHXLmwF1IotI5VIyKKHn9h6jvjrK+q6WE8rPX7qI6qixTZ3IIlKhlAxKaPuBIdZ3NVMdPfFjramKcN5ZizSiSEQqlpJBCR0YHKertX7WY1qWQkQqmZJBiSSTzqGhCZZnSgadzfQNxzg8PDHPkYmIZKdkUCJHRmJMJZzOlrpZj1/UGXYiq6lIRCqQkkGJ7B8YB2B5y+w1gwvDEUWvaY0iEalASgYl0jsYNP8sb529ZtBSX01NNMLA2NR8hiUikhMlgxI5ENYMMnUgAzTURhmNxecrJBGRnGVNBmZ2v5kdNrNX08razewJM9sZ3raF5WZmXzSzXWb2czO7PO05t4bn7zSzW9PKrzCzV8LnfNHMjFNQ7+AE9dVRWuqrM57TWFOlZCAiFSmXmsHXgE0zyu4EnnT3NcCT4WOA9wNrwp/bgXsgSB7AZ4CrgI3AZ1IJJDzn/0p73szXOiX0Do6zvLWOuXJZU20VI0oGIlKBsiYDd/8pcGxG8WbggfD+A8DNaeVf98AzQKuZLQduAJ5w92Pu3g88AWwKjzW7+zPu7sDX0651SjkwMEFnhs7jlMbaKGOT2vFMRCpPoX0GS929N7x/EFga3u8C9qWd1xOWzVXeM0v5Kad3cJzlGYaVpjSqZiAiFaroDuTwf/Tzsjazmd1uZlvNbGtfX998vGROJuNJDg/HMk44S2mqVZ+BiFSmQpPBobCJh/D2cFi+H1iRdl53WDZXefcs5bNy93vdfYO7b+jo6Cgw9NI7NDSBOxknnKU0KhmISIUqNBk8CqRGBN0KPJJW/tFwVNHVwGDYnPQ4cL2ZtYUdx9cDj4fHhszs6nAU0UfTrnXKOD7HIEufQU1UzUQiUpGqsp1gZg8C7wWWmFkPwaigPwceNrPbgLeAXw9P/wHwAWAXMAZ8DMDdj5nZnwJbwvM+6+6pTunfIRixVA/8MPw5pfQOpuYY5FAzmEzg7nOOOhIRmW9Zk4G7fzjDoetmOdeBOzJc537g/lnKtwLrs8VRyQ4MhDWDrKOJqkgknVg8SV11dD5CExHJiWYgl0Dv4DjNdVU01s6dW5vC4+o3EJFKo2RQAgcGJujM0l8ATCeL0ZjmGohIZVEyKIEDA9nnGAA01QZNQ+pEFpFKo2RQAsFSFNlrBg01Yc1gUslARCqLkkGRxicT9I9NZZ1jAMebiVQzEJFKo2RQpNSw0lz6DNSBLCKVSsmgSNMTzrIMK4VgoTpQMhCRyqNkUKTUpjadWSacQXrNQKOJRKSyKBkUKVUzWJZHn4FqBiJSaZQMinRgYJwlTTXUVmWfUVwdjVBTFWFEo4lEpMIoGRTpwOBETv0FKY012gdZRCqPkkGRenOccJYSLGOtPgMRqSxKBkXqHcxtKYoU7YMsIpVIyaAIQxNTjMTiOY0kSmmsrWJMfQYiUmGUDIrQm+PS1emCfZDVTCQilUXJoAgHBnOfY5DSVKsOZBGpPEoGRUhNOMtvNJH2QRaRyqNkUITegQkiBmctqs35OY3qQBaRCqRkUISjo5O0N9ZQFc39Y2wMm4mCHUJFRCqDkkERRmPxrFtdztRYW0XSYWIqWaaoRETyp2RQhNFYnMaa/JLB9GJ1Gl4qIhVEyaAII7H49Jd7rlLJQ53IIlJJlAyKMDoZn96jIFfa7UxEKpGSQRFGY4kC+gxSG9xo4pmIVA4lgyIU1EykPQ1EpAIpGRShkNFETWomEpEKVFQyMLPfN7NtZvaqmT1oZnVmttrMnjWzXWb2LTOrCc+tDR/vCo+vSrvOXWH562Z2Q5HvaV4kk87YZCHNRKoZiEjlKTgZmFkX8LvABndfD0SBW4DPA3e7+3lAP3Bb+JTbgP6w/O7wPMxsXfi8i4BNwJfMLL9e2QWQGhralGcHclNqNNGk+gxEpHIU20xUBdSbWRXQAPQC1wLfDo8/ANwc3t8cPiY8fp2ZWVj+kLvH3H0vsAvYWGRcZZfqAM6rZrDvORqPvUKEpGoGIlJR8mvjSOPu+83sL4C3gXHgR8DzwIC7p77peoCu8H4XsC98btzMBoHFYfkzaZdOf84JzOx24HaAlStXFhp6SaTa/HPuQB4fgPs3UeUJXqpt4NArl0PjB+HK2yBaXb5ARURyUEwzURvB/+pXA51AI0EzT9m4+73uvsHdN3R0dJTzpbJK/c8+5xnI+58HT8Av/AE/jryTtrE34bFPw3P3li9IEZEcFdNM9MvAXnfvc/cp4LvAu4DWsNkIoBvYH97fD6wACI+3AEfTy2d5TsWaTga51gx6tgIG7/597q6/gz8752+h6wp46cHyBSkikqNiksHbwNVm1hC2/V8HbAeeAj4UnnMr8Eh4/9HwMeHxn3iwdOejwC3haKPVwBrguSLimhd5NxP1PAdnrYPaRTTWhLudXfJhOPQKHHyljJGKiGRXcDJw92cJOoJfAF4Jr3Uv8Gngk2a2i6BP4L7wKfcBi8PyTwJ3htfZBjxMkEgeA+5w94ofapMaTZTTchTJZFAz6N4ABAlkbDIO6/8tRKpVOxCRBVdwBzKAu38G+MyM4j3MMhrI3SeAX8twnc8BnysmlvmW2sc4p5rBsd0wMQDdVwJB09LA2CQ0tMP5N8ArD8P7PgvRon4dIiIF0wzkAuXVZ9CzJbgNk0FT+m5nl/4GjPbB7ifLEaaISE6UDAo0GotjBg01OTQT9WyB2hZYcj6Q2u0sbAk7733QsBheVlORiCwcJYMCjYQb2wR951n0bIHuKyASfNwNNVXHJ51V1cD6D8FrP4Dx/jJGLCKSmZJBgYJF6nKoFcRG4NC26SYiCJqJRifT9kG+9MOQiMG275UnWBGRLJQMCjSa6yJ1B14ET56QDFL7II9PhU1Fyy+FjrVqKhKRBaNkUKDRXPcySHUed10xXdQ0c4Mbs2DOwb5nof+tUocqIpKVkkGBRsM+g6x6tsLi84JhpKFZl7E+5xeD296XSxmmiEhOlAwKNJLLlpfuYefxlScUz7oP8pILAIO+10scqYhIdkoGBQqaibJ0IA+8BaOHp2cep6RqFCfUDGoaoHUl9O0odagiIlkpGRQopy0ve7YGt90nTshOjUJKLWkxrWOtagYisiCUDAo0kksHcs8WqG4IFqhLc3wf5BlLMHVcAEd2QkIb34jI/FIyKEA8kSQWT+ZQM9gCnZeftOZQ6nljM3c7O+vCYL5B/5sljFZEJDslgwLkvOXlkV3BF/wMs3YgQ1AzAOh7regYRUTyoWRQgJHJ1F4Gc3QgTwxCbBBaV5x0qLFmxjyDlHDtIiUDEZlvSgYFyGnF0sFws7aW7pMOVUUj1FVHTu5Arl0ELSvUiSwi807JoAAjOSWDnuC25eSaARDudjZLR3HHWg0vFZF5p2RQgNFctrwc3BfczlIzgCCRjM6aDMIRRcmK3+xNRE4jSgYFmG4mmms5isEeiFRB09JZD2dOBmshPhFMWBMRmSdKBgXIacvLwR5o7oTI7J3MTekb3KRLjT5Sv4GIzCMlgwIc70CeYzTRYE/G/oLguVUndyDD8RFFh9VvICLzR8mgADl3IGfoL0g9d9YO5LpmaO5SzUBE5pWSQQFGY3GqIkZtVYaPL5mAof1zJ4Oa6Ox9BhB0ImuugYjMIyWDAqQWqcu4//HwQfBE1prBrH0GAB0XwpE3IJksQbQiItkpGRRgJJbI3nkMc/YZnLQPcrqOC2BqDAbfLjJSEZHcKBkUIKgZzNV5PPccAwhqBu4wNjlL7aBjbXCrfgMRmSdFJQMzazWzb5vZa2a2w8yuMbN2M3vCzHaGt23huWZmXzSzXWb2czO7PO06t4bn7zSzW4t9U+U2OpllL4NUzaC5K+Mp01tfzjaiSAvWicg8K7Zm8AXgMXdfC1wC7ADuBJ509zXAk+FjgPcDa8Kf24F7AMysHfgMcBWwEfhMKoFUqqx7GQz2QF1LMDIog9Qid7P2G9S3wqLlcFjJQETmR8HJwMxagPcA9wG4+6S7DwCbgQfC0x4Abg7vbwa+7oFngFYzWw7cADzh7sfcvR94AthUaFzzYTQWzz77eI7+Asiw9WU6jSgSkXlUTM1gNdAHfNXMXjSzr5hZI7DU3XvDcw4CqfUYuoB9ac/vCcsylZ/EzG43s61mtrWvr6+I0IszGksUNccA5tjTICW1BaZGFInIPCgmGVQBlwP3uPtlwCjHm4QA8GCozCzDZQrj7ve6+wZ339DR0VGqy+YtaCaaqwP57ZyTQeaawVqYGtWIIhGZF8Ukgx6gx92fDR9/myA5HAqbfwhvD4fH9wPpbSfdYVmm8ork7tPzDGY1MRRsbJMlGTTXBc8fHJ+a/YRl7whuD75aaKgiIjkrOBm4+0Fgn5mFQ1+4DtgOPAqkRgTdCjwS3n8U+Gg4quhqYDBsTnocuN7M2sKO4+vDsooUiyeJJz1zMhhKbWozd5/BspY6AHoHJ2Y/4awLAYODrxQYqYhI7rJs4pvV/w1808xqgD3AxwgSzMNmdhvwFvDr4bk/AD4A7ALGwnNx92Nm9qfAlvC8z7r7sSLjKpusexlMTzibu2bQUFNFe2MNPf3js59Q0wiLz4VDqhmISPkVlQzc/SVgwyyHrpvlXAfuyHCd+4H7i4llvqSGgmasGeQw4Sylq7We/QMZkgEETUX7X8g3RBGRvGkGcp5GpmsGGTqQB3vAosE8gSy6WuvZ3z+W+YSl64NNbiaGCglVRCRnSgZ5Ss0Yzlwz6AlmHmfY1CZdV1tQM5h1fSI43ol8aFshoYqI5EzJIE9Z9zLIYY5BSldrPRNTSY6NTs5+wtL1wa36DUSkzJQM8pS9A3lf7smgrR4gc79BcyfUt2lEkYiUnZJBnkbnqhkkEzB0IK+aAcD+TCOKzILagWoGIlJmSgZ5GglHEzXNtjbRyCFIxnNOBt3ZagYQ9Bsc2h4kGhGRMlEyyNPxmsEsHcQ5bGqTrqW+msaaaOa5BhDUDOLjcHR3vqGKiORMySBPo7E4tVURqqKzfHR5zDEAMLPpEUUZLUt1IqvfQETKR8kgT3PuZZDj7ON0Xa31c9cMOtZCpEprFIlIWSkZ5GnOReoGe6B27k1tZupqyzLxrKoWlpyvTmQRKSslgzyNzLWXwWAPtGTe6nI23W0NDE3EGZ7IsHopBP0GqhmISBkpGeRpdK69DIYOBHMD8jA9vDTbiKLhAzB6NK9ri4jkSskgT6OTczQTDffmtCZRuumJZ3P1G6gTWUTKTMkgTyOZ+gwScRg5nHfNoDuXmsFSbXQjIuWlZJCn0Vg884QzHBYty+t6S5pqqYlG5q4ZNHVA01J1IotI2SgZ5Gk0Uwfy8MHgdlF+NYNIxOhsraNnrpoBqBNZRMpKySAP7s7oZIYO5OEDwW2eNQNIDS/NkgyWXwKHt8PkaN7XFxHJRskgD2OTCdwzLFKXqhnk2WcAOex4BrDiKvAEHHgx7+uLiGSjZJCHOVcsHToQzBRuWJL3dbtaG+gbjjExNcdidN1XBrf7nsv7+iIi2SgZ5GFkrr0Mhg9C0zKI5P+RpoaX9g5OZD6pcTEsPg96tuR9fRGRbJQM8jAaLl89ezPRgYL6CyCHfQ1SujfCvmch0zaZIiIFUjLIw8hcy1cP9UJzfhPOUo7vazDHGkUAKzbC2FE4tqeg1xERyUTJIA9zbnk5fDDv2ccpy1rqiFgONYMVG4Nb9RuISIkpGeRhdDJDB/LkKMQGC04G1dEIS5tzmGvQsRZqm6FHyUBESkvJIA8ZO5CLGFaa0tWaw1yDSBS6N6hmICIlV3QyMLOomb1oZt8PH682s2fNbJeZfcvMasLy2vDxrvD4qrRr3BWWv25mNxQbU7kMjgfLTDfXVZ94YKjwCWcpWXc8S+neGEw+mxgq+LVERGYqRc3g94AdaY8/D9zt7ucB/cBtYfltQH9Yfnd4Hma2DrgFuAjYBHzJzDKsEb2wBsamqKuOUF8zI7wCl6JI19Vaz8HBCRLJLCOFVmwET8L+5wt+LRGRmYpKBmbWDfwK8JXwsQHXAt8OT3kAuDm8vzl8THj8uvD8zcBD7h5z973ALmBjMXGVy7HRSdobak4+UMRSFCldbfXEk86hoTnmGkDQTIRpvoGIlFSxNYO/Aj4FJMPHi4EBd4+Hj3uA1NZfXcA+gPD4YHj+dPksz6ko/aOTtM6aDA5CTVNe213OdHZ7IwB7+rKsPVTXAmddqH4DESmpgpOBmd0IHHb3eWuvMLPbzWyrmW3t6+ubr5ed1j82SXvjLMlgqPAJZynrOoNEsr13MPvJ3VcGI4qSyezniojkoJiawbuAm8zsTeAhguahLwCtZpYabtMN7A/v7wdWAITHW4Cj6eWzPOcE7n6vu29w9w0dHR1FhF6Y/rEpWhuqTz5QxByDlPbGGpa31PHq/hw6hldcBRODcHRnUa8pIpJScDJw97vcvdvdVxF0AP/E3T8CPAV8KDztVuCR8P6j4WPC4z9xdw/LbwlHG60G1gAV2QaSsWYwfKDoZABwUWcz2w7kUDOYnnz2bNGvKSIC5Zln8Gngk2a2i6BP4L6w/D5gcVj+SeBOAHffBjwMbAceA+5w9zmW71wYiaQzOD51cp+Be1AzKHApinQXdbaw58goY5PxuU9cfB7Ut6nfQERKJsPO7vlx938E/jG8v4dZRgO5+wTwaxme/zngc6WIpVwGx6dwh/aZzURjxyAxWbKagTvs6B3mirPbMp9oFi5ap2QgIqWhGcg5OjY6CUDbzGai4d7gthTJoKsFgO05NRVdCUdeh/H+ol9XRETJIEcDY2EymNlMlEoGRSxFkdLZUkdrQzXbDuTYiQzQo8lnIlI8JYMcTdcMMiWDIoeWApgZF3U282ouNYPOy8Ei6kQWkZJQMsjRwFiwLlFb48x1icJk0FR8MgBY39nCGwdHmEpkmUNQ2wRL12sFUxEpCSWDHB2bq5moYQlUzTLktADrOpuZTCTZeWgk+8krNkLPVkhW3OArETnFKBnkqH9skpqqCA0nLVJX+A5ns7moM+hEzmm+QfdGmByBwzuynysiMgclgxz1j07S1lBNsLZemuHekowkSlm9pJH66miOncjhCF41FYlIkZQMctQ/NnVyExEEfQYlTAbRiHHh8kVszyUZtK2Cxg7NNxCRoikZ5CioGcxIBokpGO0raTKAoKloe+8QyWx7G2jymYiUiJJBjmZdl2jkEOAl7TMAWN/VzEgszlvHxrKfvGIjHNsNo0dKGoOInFmUDHLUPzaVeVhpGWoGkGMn8nS/gTa7EZHCKRnkIJl0BsZmaSYq4VIU6dYsbaIqYrl1IndeBpEqNRWJSFGUDHIwNDFF0su7FEW62qooa5Yuyi0ZVNfDsouVDESkKEoGOTi+SN2MZqLhXohUQ317yV/zos5mth8YJNjyIYsVG+HAC0GHtohIAZQMctCfWopiZs1gsCeoFURK/zFe3N3CkZFJevrHs5/cfSVMjcGhV0seh4icGZQMctCfaZG6gbehdWVZXvOq1YsBeHr30ewnp1Yw3adOZBEpjJJBDvrDdYlOGlo68Da0nl2W1zx/aROLG2t4ek8OyaClG5q7YO8/lSUWETn9KRnkIJUMWtN3OYvHgj6DMtUMzIyrz13M07uPZu83MIMLPgC7noTJ0bLEIyKnNyWDHPSPTVEdNZpq03YJHewJbsuUDACuOWcxB4cmePNoDpPP1t0E8XHY+UTZ4hGR05eSQQ76Rydpbag5cZG6gbeD23Img3Pz6DdY+c5gKe0dj5YtHhE5fSkZ5KB/bJL22TqPoazJ4JwljZy1qJaf7c5hqYloFaz9FXjjcZiaKFtMInJ6UjLIQf/o1In9BRAkg0hVyWcfpzMz3nnuYp7Zcyy3+QbrNgf7G+z+SdliEpHTk5JBDmZdpG7g7WCOQbRq9ieVyDXnLubISIxdh3PY+Wz1e6CuFbY/UtaYROT0o2SQg/6xoM/gBGUcVprumnOWAOQ2xDRaHTQVvf5DiE+WOTIROZ0oGWTh7vSPTdE+cymKMk44S7eivZ6u1vrcOpEBLrwJYoOacyAieSk4GZjZCjN7ysy2m9k2M/u9sLzdzJ4ws53hbVtYbmb2RTPbZWY/N7PL0651a3j+TjO7tfi3VTpDE3ESST9x9nGZ5xikMzOuOXcxT+85mn2zG4BzfwlqFqmpSETyUkzNIA78gbuvA64G7jCzdcCdwJPuvgZ4MnwM8H5gTfhzO3APBMkD+AxwFbAR+EwqgVSCWZeiGOwBfF6SAQTzDQbGpnjt4HD2k6tq4YJN8No/QCJe/uBE5LRQcDJw9153fyG8PwzsALqAzcAD4WkPADeH9zcDX/fAM0CrmS0HbgCecPdj7t4PPAFsKjSuUkvNPj5hxdJ5GFaabnq+QS79BhCMKho/Bm/+cxmjEpHTSUn6DMxsFXAZ8Cyw1N3Dhf45CCwN73cB+9Ke1hOWZSqvCNPJIL1mMM/JoLO1nrMXN+Teb3DudVDXAs/cU97AROS0UXQyMLMm4DvAJ9z9hN1YPBgcn0NDd86vdbuZbTWzrX19faW67Jz6R4Plq08YWjq4DywKi0q7qc1c3nnuEp7efYSJqUT2k2sa4F2fgJ2Pw9vPlD02ETn1FZUMzKyaIBF8092/GxYfCpt/CG8Ph+X7gRVpT+8OyzKVn8Td73X3De6+oaOjo5jQc3Z8kboZNYPmrrLPMUj3K+9YzuhkgqdeO5z9ZICrfhsaz4InPwu5TFgTkTNaMaOJDLgP2OHuf5l26FEgNSLoVuCRtPKPhqOKrgYGw+akx4Hrzawt7Di+PiyrCP1jk0QjRnNd2hf/PA0rTXfNuYtZ0lTLIy8dyO0JNY3wi5+Ct/4Vdj9Z3uBE5JRXTM3gXcBvAdea2UvhzweAPwfeZ2Y7gV8OHwP8ANgD7AL+BvgdAHc/BvwpsCX8+WxYVhGOjU7R1lB98iJ185wMohHjxouX85PXDzM0keP2lpffGsT55GchmSxvgCJySiu4ncPd/wWwDIevm+V8B+7IcK37gfsLjaWcBsYmZ8wxmIShA/OeDAA2X9rJ1372Jo+/epBf27Ai+xOqauC9/y987z8Fq5ledHPZYxSRU5NmIGdxbHRGMhia3zkG6S5d0crK9gYefTnHpiKAi38dOtbCT/5M8w5EJCMlgywGxqYWdI5BOjPjpks6+dddR+gbjuX2pEgUrv0vcHQnPP/V8gYoIqcsJYMsjs1sJlrAZABBU1HS4R9+nkftYO2NcM4vwY//JGjiEhGZQclgDu4e9Bk0zkgGFg2Gli6ANUsXsXbZovyaiszgxrshGYcf/GH5ghORU5aSwRxGYnGmEk5bw4xmonnYx2Aumy/t4oW3B9h3LIe9kVPaV8N774TXvg87/r58wYnIKUnJYA4DY8EQzpOaiRaoiSjlg5cEu6vlVTsAuOYOWPqOoHYwMViGyETkVKVkMIdjs61YOrBvwZNBd1sDG1e1842n32IklscIoWg13PQFGD4YzD0QEQkpGczh4FCwsfzipjAZxCdheGHmGMz06fev5dDwBH/5ozfye2LXFXDVf4It92nPAxGZpmQwh1d6BolGjLXLmoOCof3gyYpIBlec3cZHrlrJ1362l1f359nkc90fw4qN8J3/CLufKk+AInJKUTKYw0v7Brhg6SLqa6JBwQIPK53pD29Yy+KmWu767iskctkFLaWmAX7jW7D4PHjoI9DzfPmCFJFTgpJBBsmk8/K+AS5d2Xq8sH9vcNuSw1IQ86Clvpo/vnEdr+wf5OtPv5nfk+vb4Lf+DhqXwDc/BH2vlyVGETk1KBlksOfIKMOxOJd2tx4v3PUkNC2tmJoBwI0XL+cXz+/gLx5/nd7B8fyevGgZfPR7EKmCb/wbGD5UlhhFpPIpGWTw0r4BgOM1g8lR2PkEXPjBYImHCmFm/NnN60m48/G/fYHB8RxXNE1pPwd+8zsw3g8P/xbEc1zmQkROK0oGGby0r5+m2irO7WgKCnb+COLjsO7mBY1rNivaG/jiLZex7cAgv/E3z0wPic3Z8oth8/+Efc/CDz9VniBFpKIpGWTw8r5BLu5uIRoJV+ne/gg0dsDZ71zYwDK4/qJl/M1HN7Dr8Ai33Ps0h4cn8rvA+l+Fd38Snv9aMOxURM4oSgazmJhKsKN3iEtWtAYFk2Pwxo8qrolopvdecBZf/fdXsu/YOP/ufz2Tfx/Ctf8FzntfUDt462flCVJEKpKSwSy2HRgknnQuTSWDXT+GqVFYt3lB48rFO89bwjdu20jfcIyPfXULw7nuigZBovu3X4G2VUGH8o//q5atEDlDKBnM4qV9wRfgZalksP0RaFgMZ7974YLKw4ZV7XzpI5ez8/AId/zvF5lK5LHlZX0r3Pr3QeL7l7vhC5fC019Sx7LIaU7JYBYv7Rugs6WOs5rrYGoC3ngs2BNgAVcqzdd7zu/gczev56dv9PHHj7xKsOtojpo74Vfvhd/+KSx7Bzx+F3zxMnjmy0GTmYicdpQMZvHSvv7j/QW7n4TJkVOiiWimWzau5OPvPZcHn9vHl/9pT/4XWH4JfPQR+M3vQuvZ8Nin4a/Ww0//G4wPlDxeEVk4SgYzHB2Jse/Y+PH+gu2PBLN1V79nQeMq1B9efwE3Xryczz/2Gh/76nN878X9jOaz0qkZnHcd/Icfwsd+CJ2XB/sp330RPP5HMLi/fMGLyLw5ddo95snLPQNAsPk88Ri8/kNYd1Ow/PMpKBIx/uLXLmFlewPfe3E/n/jWS9RVR7jhomV8/L3nHl+ELxdnvzP46f05/OyL8Mw98OyX4R2/Dhs+Bt1XBslDRE45SgYzvPT2ABGD9V0t8MJXITYE6/7NQodVlLrqKJ/atJb/5/oL2PpWP4++vJ9HXjzAoy8f4IMXd/L77zuf1Usac7/g8ouDUUfX/n/wzJfgha/Dy/87WKZj/YfgHR+Cs9YpMYicQiyvjsUKsmHDBt+6dWvJr/vR+5/j8NAEj/3mcvjyL8DKq4M288jp1aI2MDbJvT/dw1f/9U0mE0k2X9rJhzeuZMPZbVi+X+ITQ/DaP8Ar/wf2/CN4AlpWwnnXwnm/HDSx1bWU5X2ISH7M7Hl333BSuZLBce7OpZ99gg9etJg/O/rJYMnqj/8MmpeX9HUqyZGRGF96ajcPbXmbsckEK9sb+NXLu7h+3TJWLWmgoSbPyuNIH7z298Gifnv+CSaHg/L6NljUGXyWbauD5qZVvwBNHaV/UyKSUcUnAzPbBHwBiAJfcfc/n+v8UicDd+e//+gN/vqpXTy2/inW7vob+Hd/G8w6PgOMxuI8vu0g33mhh5/tPkrqz6JjUS1ntzewZmkTF3W2sL6rhbXLFlFXncNM7MQU7HsO9j0TdDQP98LQATi6KxihBdBxYVD7Wn4xLLsElq6D6vryvVGRM1xFJwMziwJvAO8DeoAtwIfdfXum55QyGUwlktz13Vf49vM93HnhUX577+9il/0mbP7rklz/VHNgYJzn3+rn7WNjvHV0lDePjvHGoWEGxoLZzNGI0d1Wz4q2Bla019Pd1kBnax1Lm+tY3lLPWYtqqa+OEolkaG5KTEHvy7D3p/DmPweb68TCmc4WgeauYJ+Fxg5oWBLM70gmgh9PBseau4L5EIuWQSStcz8Sgfr24JyaJvVbiMyQKRlUSgfyRmCXu+8BMLOHgM1AxmRQKiOxOJ/8xj8zvucZHlpzhKuO/APWvho2zVkxOa11ttbT2Xri/87dnf0D47y6f4htBwbZe2SUff3j/GjbIY5mWCW1KmLUVEWojkYwAyNYcrsqYjTVVtFUt5Gm2nfS3FXFOTVHOT+5l7OndtM6eZC6yWPUHd5P3eQrmCdIWgS3KGDUxo5Slci+7lIyUk28phmP1OCRajxaDdFarLoOq6knUl2P1TaRrFmE1zSTrF0UJKNU4iEJkWo8WgtVtXi0hog5UU8QIUnUHIvWQlUNRGugqg6qG4KaTU1jsE9EmIzcHcywSHWw7IdFITkF8Ylg1Fo8FlyjOrxGtDpImqnjyUSwQ11NE9QugqraMDl6kCAj0eB5VbXHE2AyCVNjwTU8CVh4zIKkGakK4kjFY5Hc+8bcIRkPfiLV8zMhM5kIYlSCL4tKSQZdwL60xz3AVeV4oZc/fwNtsR4Mxwj+oO/xPqI1Dj2RYMbtTf8DapvK8fKnLDOju62B7rYGNq1fdsKxsck4vYMTHBqc4ODQBIeHY0xMJZiMJ5mMJ5lKJHHC7y2ceMIZicUZicUZjcXZfWSUrWMR+sdWkkjmsouc08wYy+0oZ9kAEY7XbquI02YjtDNEu43QPDlKFQmqLU41cWqJU8skdTZKPTEaiNFsYyxijHo7cR2npBsRK03Neb6+vpIYE9QSIUkdeS5lHooTIU6UKaqJU0USo5oEURJUpd2mm6KKCWqZoAbDp8+LksQxkhhJIjg2/W8vlXaSRIhbhCSR6XMIn1NNnFqPUUuMGoL5MePUErNaYuH7jBKn2uNESUy/lhMhGV4n/JeOW3DtBJHpV09FFUlF6D4dReo5iTCm1HtIenC2pxI9hgGpdxAlgUH47tPfF9PXSY/MzDF3UldLOV5iae8ouG359KvU1jUU9PvNpFKSQU7M7HbgdoCVKwvbbWx80Sri0Vo89RViRuLsizj7smuhawPU5THuXgBoqAn2fZje+6FAyaQzNDHF2GSCRNKZSiSJJ326RhGNBP+I4kknnkwyFXcmE0nAw0QTJJzjtZDgcSLp4XOc2FSC/qkEo7EEY5Nxku5EzILr+xQRUl8awf9AzRNEkpNEk5NEkpPEPcKUG1NJYyoBnojh8UmSUzEiiQnqiAVfXh6jijgRM4LWMgNPkEgkSManSCQTJKyKhNUQj9aStGoiPkVVIkZVMkbUJ4lbNXGqmbJakhBcNzlGXXKcqE9Nx+kYERJUJSaoTk5QkxwnaVVMReqYitQSj9SFXzsefCA45kGTWyQZxzxJNOJUmxNN1Xw8Hvwkp8CTxIkSD7/e41SRIBLET4QqEtR6jDomqEnGcAuSSTI831LXDb/IkhgJh0QyiCniaV+inkx9dWOeJG7VTEXqiEdqmYrUnfA+q8PXSlo1iUg1SYsGX5YefOVb+H5T18M9vH4SCz+H6c/QjieipEWmv4jNw1TgQU0w9YlHCf7ugr+94I8vGSaOJFGcMDmEtciIJ4Mv/bCc6eRiwa/EgtSQqrkdj53pOFPxuUW4wko/urFSksF+IP2/hN1h2Qnc/V7gXgj6DAp5oat/538V8jSZB5GI0dpQQ2tp/8MjIjmolMHzW4A1ZrbazGqAW4BHFzgmEZEzRkXUDNw9bmb/GXicYGjp/e6+bYHDEhE5Y1REMgBw9x8AP1joOEREzkSV0kwkIiILSMlARESUDERERMlARERQMhARESpkobpCmFkf8FaBT18CHClhOKVSqXFB5cZWqXGBYitEpcYFlRtbvnGd7e4nrR1/yiaDYpjZ1tlW7VtolRoXVG5slRoXKLZCVGpcULmxlSouNROJiIiSgYiInLnJ4N6FDiCDSo0LKje2So0LFFshKjUuqNzYShLXGdlnICIiJzpTawYiIpLmjEoGZrbJzF43s11mducCx3K/mR02s1fTytrN7Akz2xneti1AXCvM7Ckz225m28zs9yootjoze87MXg5j+5OwfLWZPRv+Xr8VLoM+78wsamYvmtn3KyyuN83sFTN7ycy2hmUL/vsM42g1s2+b2WtmtsPMrlno2MzsgvCzSv0MmdknFjqutPh+P/z7f9XMHgz/XRT9t3bGJAMziwL/E3g/sA74sJmtW8CQvgZsmlF2J/Cku68Bngwfz7c48Afuvg64Grgj/JwqIbYYcK27XwJcCmwys6uBzwN3u/t5QD9w2wLEBvB7wI60x5USF8AvufulaUMQK+H3CfAF4DF3XwtcQvD5LWhs7v56+FldClwBjAF/t9BxAZhZF/C7wAZ3X0+w5P8tlOJvzd3PiB/gGuDxtMd3AXctcEyrgFfTHr8OLA/vLwder4DP7RHgfZUWG9AAvECwV/YRoGq23/M8xtNN8AVxLfB9gp03Fzyu8LXfBJbMKFvw3yfQAuwl7LuspNjSYrke+NdKiYvj+8W3E2xB8H3ghlL8rZ0xNQOOf4gpPWFZJVnq7r3h/YPA0oUMxsxWAZcBz1IhsYVNMS8Bh4EngN3AgLvHw1MW6vf6V8CngGT4eHGFxAXBZr0/MrPnw33EoTJ+n6uBPuCrYfPaV8yssUJiS7kFeDC8v+Bxuft+4C+At4FeYBB4nhL8rZ1JyeCU4kGKX7ChXmbWBHwH+IS7D6UfW8jY3D3hQfW9G9gIrF2IONKZ2Y3AYXd/fqFjyeDd7n45QRPpHWb2nvSDC/j7rAIuB+5x98uAUWY0vSzk31rY7n4T8H9mHluouMJ+is0EibQTaOTk5uaCnEnJYD+wIu1xd1hWSQ6Z2XKA8PbwQgRhZtUEieCb7v7dSootxd0HgKcIqsStZpbatW8hfq/vAm4yszeBhwiair5QAXEB0/+bxN0PE7R9b6Qyfp89QI+7Pxs+/jZBcqiE2CBIni+4+6HwcSXE9cvAXnfvc/cp4LsEf39F/62dSclgC7Am7HWvIaj+PbrAMc30KHBreP9Wgvb6eWVmBtwH7HD3v6yw2DrMrDW8X0/Ql7GDICl8aKFic/e73L3b3VcR/F39xN0/stBxAZhZo5ktSt0naAN/lQr4fbr7QWCfmV0QFl0HbK+E2EIf5ngTEVRGXG8DV5tZQ/hvNfWZFf+3tlAdMwvxA3wAeIOgnfmPFjiWBwna/KYI/od0G0E785PATuDHQPsCxPVugurvz4GXwp8PVEhsFwMvhrG9CvxxWH4O8Bywi6BKX7uAv9f3At+vlLjCGF4Of7al/u4r4fcZxnEpsDX8nX4PaKuE2AiaX44CLWllCx5XGMefAK+F/wa+AdSW4m9NM5BFROSMaiYSEZEMlAxERETJQERElAxERAQlAxERQclARERQMhAREZQMREQE+P8BkVrWZQOkd2UAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.plot(s1,s2)\n",
    "plt.plot(t1,t2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8fef6de5",
   "metadata": {},
   "source": [
    "### 构建词表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "93e37df7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter  #计数类\n",
    "\n",
    "flatten = lambda l: [item for sublist in l for item in sublist]  #展平数组\n",
    "\n",
    "# 构建词表\n",
    "class Vocab:\n",
    "    def __init__(self, tokens):\n",
    "        self.tokens = tokens  # 传入的tokens是二维列表\n",
    "        self.token2index = {'<bos>': 0, '<eos>': 1, '<unk>':2, '<pad>':3}  # 先存好特殊词元\n",
    "        # 将词元按词频排序后生成列表\n",
    "        self.token2index.update({\n",
    "            token: index + 4\n",
    "            for index, (token, freq) in enumerate(\n",
    "                sorted(Counter(flatten(self.tokens)).items(), key=lambda x: x[1], reverse=True))\n",
    "        }) \n",
    "        #构建id到词元字典\n",
    "        self.index2token = {index: token for token, index in self.token2index.items()}\n",
    " \n",
    "    def __getitem__(self, query):\n",
    "        # 单一索引\n",
    "        if isinstance(query, (str, int)):\n",
    "            if isinstance(query, str):\n",
    "                return self.token2index.get(query, 0)\n",
    "            elif isinstance(query, (int)):\n",
    "                return self.index2token.get(query, '<unk>')\n",
    "        # 数组索引\n",
    "        elif isinstance(query, (list, tuple)):\n",
    "            return [self.__getitem__(item) for item in query]\n",
    " \n",
    "    def __len__(self):\n",
    "        return len(self.index2token)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dd7d8cec",
   "metadata": {},
   "source": [
    "### 构建数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "8c200c70",
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.utils.data import DataLoader, TensorDataset\n",
    "\n",
    "seq_len = 48  # 序列最大长度\n",
    "\n",
    "# 对数据按照最大长度进行截断和填充\n",
    "def padding(tokens, seq_len):\n",
    "    # 该函数针对单个句子进行处理\n",
    "    # 传入的句子是词元形式\n",
    "    return tokens[:seq_len] if len(tokens) > seq_len else tokens + ['<pad>'] * (seq_len - len(tokens))\n",
    "\n",
    "#实例化source和target词表\n",
    "src_vocab, tgt_vocab = Vocab(src_tokens), Vocab(tgt_tokens)\n",
    "\n",
    "#增加结尾标识<eos>\n",
    "src_data = torch.tensor([src_vocab[padding(line + ['<eos>'], seq_len)] for line in src_tokens])\n",
    "tgt_data = torch.tensor([tgt_vocab[padding(line + ['<eos>'], seq_len)] for line in tgt_tokens])\n",
    "\n",
    "# 训练集和测试集比例8比2，batch_size = 16\n",
    "train_size = int(len(src_data) * 0.8)\n",
    "test_size = len(src_data) - train_size\n",
    "batch_size = 256\n",
    "\n",
    "train_loader = DataLoader(TensorDataset(src_data[:train_size], tgt_data[:train_size]), batch_size=batch_size)\n",
    "test_loader = DataLoader(TensorDataset(src_data[-test_size:], tgt_data[-test_size:]), batch_size=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5efd98ec",
   "metadata": {},
   "source": [
    "## 模型训练"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "304307b6",
   "metadata": {},
   "source": [
    "### 模型定义"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5975b323",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 定义编码器\n",
    "class Encoder(nn.Module):\n",
    " \n",
    "    def __init__(self, vocab_size, ebd_size, hidden_size, num_layers):\n",
    "        super().__init__()\n",
    "        self.embedding = nn.Embedding(vocab_size, ebd_size, padding_idx=3)  # 将token表示为embedding\n",
    "        self.gru = nn.GRU(ebd_size, hidden_size, num_layers=num_layers)\n",
    " \n",
    "    def forward(self, encoder_inputs):\n",
    "        # encoder_inputs从(batch_size, seq_len)变成(batch_size, seq_len, emb_size)再调整为(seq_len, batch_size, emb_size)\n",
    "        encoder_inputs = self.embedding(encoder_inputs).permute(1, 0, 2)\n",
    "        output, hidden = self.gru(encoder_inputs)\n",
    "        # hidden 的形状为 (num_layers, batch_size, hidden_size)\n",
    "        # 最后时刻的最后一个隐层的输出的隐状态即为上下文向量\n",
    "        return hidden\n",
    "\n",
    "# 定义解码器\n",
    "class Decoder(nn.Module):\n",
    " \n",
    "    def __init__(self, vocab_size, ebd_size, hidden_size, num_layers):\n",
    "        super().__init__()\n",
    "        self.embedding = nn.Embedding(vocab_size, ebd_size, padding_idx=3)\n",
    "        # 拼接维度ebd_size + hidden_size\n",
    "        self.gru = nn.GRU(ebd_size + hidden_size, hidden_size, num_layers=num_layers)\n",
    "        self.linear = nn.Linear(hidden_size, vocab_size)\n",
    " \n",
    "    def forward(self, decoder_inputs, encoder_states):\n",
    "        '''\n",
    "            decoder_inputs 为目标序列偏移一位的结果, 由初始形状: (batch_size, seq_len)变为(batch_size, seq_len)\n",
    "            再调整为(batch_size, seq_len, emb_size) -> (seq_len, batch_size, emb_size)\n",
    "        '''\n",
    "        decoder_inputs = self.embedding(decoder_inputs).permute(1, 0, 2)\n",
    "        context = encoder_states[-1] # 上下文向量取编码器的最后一个隐层的输出\n",
    "        # context 初始形状为 (batch_size, hidden_size)，为下一步连接，需repeat为(seq_len, batch_size, hidden_size)形式 \n",
    "        context = context.repeat(decoder_inputs.shape[0], 1, 1)\n",
    "        output, hidden = self.gru(torch.cat((decoder_inputs, context), -1), encoder_states)\n",
    "        # logits 的形状为 (seq_len, batch_size, vocab_size)\n",
    "        logits = self.linear(output)\n",
    "        return logits, hidden\n",
    "\n",
    "# seq2seq模型\n",
    "class Seq2Seq(nn.Module):\n",
    " \n",
    "    def __init__(self, encoder, decoder):\n",
    "        super().__init__()\n",
    "        self.encoder = encoder\n",
    "        self.decoder = decoder\n",
    " \n",
    "    def forward(self, encoder_inputs, decoder_inputs):\n",
    "        return self.decoder(decoder_inputs, self.encoder(encoder_inputs))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fbabc157",
   "metadata": {},
   "source": [
    "### 模型训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "890d4b46",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 50/50 [1:36:32<00:00, 115.86s/it]  \n"
     ]
    }
   ],
   "source": [
    "from tqdm import *\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 设置是否使用GPU\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "\n",
    "# 设置超参数\n",
    "lr = 0.001\n",
    "num_epochs = 50\n",
    "hidden_size = 256\n",
    "\n",
    "# 建立模型\n",
    "encoder = Encoder(len(src_vocab), len(src_vocab), hidden_size, num_layers=2)\n",
    "decoder = Decoder(len(tgt_vocab), len(tgt_vocab), hidden_size, num_layers=2)\n",
    "model = Seq2Seq(encoder, decoder)\n",
    "model.to(device)\n",
    "\n",
    "# 交叉熵损失及adam优化器\n",
    "criterion = nn.CrossEntropyLoss(reduction='none', ignore_index =3)\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n",
    "\n",
    "# 记录损失变化\n",
    "loss_history = []\n",
    "\n",
    "#开始训练\n",
    "model.train()\n",
    "for epoch in tqdm(range(num_epochs)):\n",
    "    for encoder_inputs, decoder_targets in train_loader:\n",
    "        encoder_inputs, decoder_targets = encoder_inputs.to(device), decoder_targets.to(device)\n",
    "        # 偏移一位作为decoder的输入\n",
    "        # decoder的输入第一位是<bos>\n",
    "        bos_column = torch.tensor([tgt_vocab['<bos>']] * decoder_targets.shape[0]).reshape(-1, 1).to(device)\n",
    "        decoder_inputs = torch.cat((bos_column, decoder_targets[:, :-1]), dim=1)\n",
    "        # pred的形状为 (seq_len, batch_size, vocab_size)\n",
    "        pred, _ = model(encoder_inputs, decoder_inputs)\n",
    "        # decoder_targets 的形状为 (batch_size, seq_len)，我们需要改变pred的形状以保证它能够正确输入\n",
    "        # loss 的形状为 (batch_size, seq_len)，其中的每个元素都代表了一个词元的损失\n",
    "        loss = criterion(pred.permute(1, 2, 0), decoder_targets).mean()\n",
    "\n",
    "        # 反向传播\n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        loss_history.append(loss.item())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "bb0a2c6a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGdCAYAAADuR1K7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAABfR0lEQVR4nO3dd3gU1foH8O+mJ5CEmpBQgyAt1ID0JhAERFGvFcWuKIiCiKJyLVfFq15FlGKFnyKKGlRURANSpJeEHnogISQECCkESJ3fHyFLdnd25uxmZmd38/08Tx7N7tmZs5OQefec97zHJEmSBCIiIiIv4WN0B4iIiIi0xOCGiIiIvAqDGyIiIvIqDG6IiIjIqzC4ISIiIq/C4IaIiIi8CoMbIiIi8ioMboiIiMir+BndAVcrLy/HqVOnEBoaCpPJZHR3iIiISIAkSSgoKEB0dDR8fJTHZmpccHPq1Ck0bdrU6G4QERGRE9LT09GkSRPFNjUuuAkNDQVQcXHCwsIM7g0RERGJyM/PR9OmTc33cSU1LripnIoKCwtjcENERORhRFJKmFBMREREXoXBDREREXkVBjdERETkVRjcEBERkVdhcENERERehcENEREReRUGN0RERORVGNwQERGRV2FwQ0RERF6FwQ0RERF5FQY3RERE5FUY3BAREZFXYXCjoT/2ZOLPfVlGd4OIiKhGMzS4mTlzJnr06IHQ0FBERERgzJgxOHjwoOJr1qxZA5PJZPN14MABF/VaXv7lEjzxTRIe/3oHLpeUGdoXIiKimszQ4Gbt2rWYMGECNm/ejMTERJSWliI+Ph6FhYWqrz148CAyMzPNX61bt3ZBj+27WHQ1oCkuKzewJ0RERDWbn5EnX7FihcX3CxYsQEREBHbs2IEBAwYovjYiIgJ16tTRsXdERETkidwq5yYvLw8AUK9ePdW2Xbt2RVRUFIYMGYLVq1fr3TWHSJLRPSAiIqq5DB25qUqSJEyZMgX9+vVDbGys3XZRUVH49NNPERcXh6KiInz99dcYMmQI1qxZIzvaU1RUhKKiIvP3+fn5uvSfiIiI3IPbBDcTJ07E7t27sX79esV2bdq0QZs2bczf9+7dG+np6Xjvvfdkg5uZM2fitdde07y/Skwml56OiIiIqnCLaamnnnoKy5Ytw+rVq9GkSROHX9+rVy8cPnxY9rnp06cjLy/P/JWenl7d7qritBQREZFxDB25kSQJTz31FH766SesWbMGMTExTh0nOTkZUVFRss8FBgYiMDCwOt0UwtEaIiIi92BocDNhwgQsXrwYv/zyC0JDQ5GVVVEALzw8HMHBwQAqRl4yMjLw1VdfAQBmzZqFFi1aoEOHDiguLsaiRYuQkJCAhIQEw94HwNEaIiIid2FocDNv3jwAwKBBgyweX7BgAR544AEAQGZmJtLS0szPFRcXY+rUqcjIyEBwcDA6dOiA33//HSNHjnRVt1VxFIeIiMg4JkmqWWMO+fn5CA8PR15eHsLCwjQ7blbeZfSauQoAsOuVeIQH+2t2bCIioprOkfu3WyQUewOO1hAREbkHBjcaqVnjX0RERO6LwQ0RERF5FQY3GuG0FBERkXtgcENERERehcENEREReRUGN0RERORVGNwQERGRV2FwowcuCyciIjIMgxsiIiLyKgxu9MBl4URERIZhcKMHTksREREZhsGNRjhYQ0RE5B4Y3GiEgzVERETugcGNHjiMQ0REZBgGN3rgMA4REZFhGNxohIM1RERE7oHBjUY4WENEROQeGNzo5Ptt6Rj7+WbkXSoxuitEREQ1CoMbjVhPS01L2I0NR85h3pqjhvSHiIiopmJwo7OCyxy5ISIiciUGNzqQmIFDRERkGAY3RERE5FUY3BAREZFXYXCjA4mzUkRERIZhcKMVVvEjIiJyCwxutGJntMbEoIeIiMilGNzo7Nut6UZ3gYiIqEZhcKODqoM4ZeUSUs8WGtYXIiKimobBjVYUpp9yCotd1w8iIqIajsGNVhRXSHH5FBERkaswuNGBxLXgREREhmFwoxWuiiIiInILDG5cgAM5RERErsPgRiN1ggPM/89YhoiIyDgMbjQS4Gf/UjLYISIich0GN0RERORVGNzogDk2RERExmFwoyHuI0VERGQ8BjdERETkVRjc6ECySiHmNBUREZHrMLjREGeliIiIjMfgxgW4HQMREZHrMLghIiIir8LgRg8cqCEiIjIMgxsNmbgWnIiIyHAMboiIiMirMLjRgfWsFGepiIiIXIfBjYY4KUVERGQ8BjcuwJXgRERErsPgRgcMZoiIiIzD4EZD9hZLWW/HQERERPphcENERERehcGNDqxHakxMNSYiInIZBjcashfEcFqKiIjIdRjcEBERkVdhcKMDm9VSHLghIiJyGUODm5kzZ6JHjx4IDQ1FREQExowZg4MHD6q+bu3atYiLi0NQUBBatmyJ+fPnu6C3AphaQ0REZDhDg5u1a9diwoQJ2Lx5MxITE1FaWor4+HgUFhbafU1qaipGjhyJ/v37Izk5GS+++CImTZqEhIQEF/bcMRy4ISIich0/I0++YsUKi+8XLFiAiIgI7NixAwMGDJB9zfz589GsWTPMmjULANCuXTts374d7733Hm677Ta9u0xERERuzq1ybvLy8gAA9erVs9tm06ZNiI+Pt3hs+PDh2L59O0pKSmzaFxUVIT8/3+JLbxypISIiMo7bBDeSJGHKlCno168fYmNj7bbLyspCZGSkxWORkZEoLS3F2bNnbdrPnDkT4eHh5q+mTZtq3vdKTLkhIiIyntsENxMnTsTu3bvx7bffqrY1We1zIF1ZnmT9OABMnz4deXl55q/09HRtOuwA7jVFRETkOobm3FR66qmnsGzZMqxbtw5NmjRRbNuoUSNkZWVZPJadnQ0/Pz/Ur1/fpn1gYCACAwM17a8aidEMERGRYQwduZEkCRMnTsTSpUvx999/IyYmRvU1vXv3RmJiosVjf/31F7p37w5/f3+9uiqEG2cSEREZz9DgZsKECVi0aBEWL16M0NBQZGVlISsrC5cuXTK3mT59OsaNG2f+fvz48Thx4gSmTJmClJQUfPnll/jiiy8wdepUI94CERERuRlDg5t58+YhLy8PgwYNQlRUlPlryZIl5jaZmZlIS0szfx8TE4Ply5djzZo16NKlC/7zn/9g9uzZbrUM3HpWirNURERErmNozo1IbsrChQttHhs4cCCSkpJ06FH1cPdvIiIi47nNaikiIiIiLTC40dClkjKju0BERFTjMbjRwbbjORbfM+WGiIjIdRjc6ODg6QKju0BERFRjMbhxARb1IyIich0GN0RERORVGNzowKbOjZ12mXmX8M2WEzh25oLufSIiIqop3GJvqZqq98y/zf+/9aUhiAgNMrA3RERE3oEjN27i8GmO3hAREWmBwY0OnEkgZs4xERGRNhjc6ICBChERkXEY3LgCgx0iIiKXYXCjA2diGYkREBERkSYY3LgAAxciIiLXYXCjA+ucm30Z+cZ0hIiIqAZicKMD65Ga/yUeMqgnRERENQ+DGzfBFVZERETaYHCjAwYqRERExmFwo4NyZ4r46dAPIiKimojBjQ5KyhiqEBERGYXBjQ7Ky53ZfoEBERERkRYY3BAREZFXYXCjA2dyboiIiEgbDG6IiIjIqzC40QHHbYiIiIzD4EYHzsxKMSAiIiLSBoMbHXCjTCIiIuMwuNFBZFiQ4y9iPERERKQJBjc66Nwk3OguEBER1VgMbjTUo0VdANxbioiIyEgMbjRkggmA9jNMe07mYVXKaY2PSkRE5J38jO6AV6mIbfDkN0kOv1QpCXn0x+sBACunDECriFCnukZERFRTcORGQyadj3/87EWdz0BEROT5GNxoyFSN6EYkT4epPEREROoY3GgoPeeS5sfcm5Gn+TGJiIi8GYMbDWXkOh/c2Bu5ufGj9VXacOyGiIhIDYMbIiIi8ioMbjwIx22IiIjUMbghIiIir8Lgxk1wVIaIiEgbDG7cBJOFiYiItMHghoiIiLwKgxs3ITJuw8EdIiIidQxuiIiIyKswuCEiIiKvwuCGiIiIvAqDGzchlk/DpBsiIiI1DG68xMw/UnDXp5tQUlZudFeIiIgMxeDGbaiPyiiN7nyy9hg2H8vBqpTTGvaJiIjI8zC48TIlZZy6IiKimo3BjZuYv/aYahuhWjjV7woREZFHY3DjJnam5zr9Wm7dQEREdBWDGy8w5ftd5v9noENERDUdgxsv8FNyhtFdICIichsMbjwIB2WIiIjUMbghIiIir2JocLNu3TqMHj0a0dHRMJlM+PnnnxXbr1mzBiaTyebrwIEDrumwwSSuhSIiIlJlaHBTWFiIzp074+OPP3bodQcPHkRmZqb5q3Xr1jr1UDvnC4uN7gIREVGN4GfkyUeMGIERI0Y4/LqIiAjUqVNH+w7paNbKQ3jt5ljdz6OUl1NeLqGotBzBAb6694OIiMgoHplz07VrV0RFRWHIkCFYvXq1YtuioiLk5+dbfBmhoKi02seobkLxLfM2ot2/V+DchaJq94WIiMhdORzcrFixAuvXrzd/P2fOHHTp0gX33HMPzp8/r2nnrEVFReHTTz9FQkICli5dijZt2mDIkCFYt26d3dfMnDkT4eHh5q+mTZvq2kd7zhQYH1DsulIocPXBM8Z2hIiISEcOBzfPPfecefRjz549ePbZZzFy5EgcO3YMU6ZM0byDVbVp0waPPvoounXrht69e2Pu3LkYNWoU3nvvPbuvmT59OvLy8sxf6enpuvbRntP5l6t9DK3SiVnoj4iIvJnDOTepqalo3749ACAhIQE33ngj3nrrLSQlJWHkyJGad1BNr169sGjRIrvPBwYGIjAw0IU9MhZXVBERUU3n8MhNQEAALl68CABYuXIl4uPjAQD16tUzJJ8lOTkZUVFRLj+vEbQacWH4Q0RE3szhkZt+/fphypQp6Nu3L7Zu3YolS5YAAA4dOoQmTZo4dKwLFy7gyJEj5u9TU1Oxc+dO1KtXD82aNcP06dORkZGBr776CgAwa9YstGjRAh06dEBxcTEWLVqEhIQEJCQkOPo2vJa9+IdTUUREVFM4HNx8/PHHePLJJ/Hjjz9i3rx5aNy4MQDgjz/+wA033ODQsbZv347Bgwebv6/M2bn//vuxcOFCZGZmIi0tzfx8cXExpk6dioyMDAQHB6NDhw74/fffDZkO8zRTf9htdBeIiIhcwuHgplmzZvjtt99sHv/ggw8cPvmgQYMURxQWLlxo8f20adMwbdo0h89DQELSSaO7QERE5BIO59wkJSVhz5495u9/+eUXjBkzBi+++CKKi1mFV01pWTlKy8qN7QRnqIiIyIs5HNw8/vjjOHToEADg2LFjuOuuuxASEoIffviBoyoKTDChvFzCoPfWoP87q1FW7niEIZI2w9QaIiKq6RwObg4dOoQuXboAAH744QcMGDAAixcvxsKFC5nYq6LgcilOnr+EzLzLTlUJ/ujvw5r0g8vFiYjImzkc3EiShPLyimmVlStXmpN5mzZtirNnz2rbOy+iRUBx9EyhwHmqJ7vgMvaczKvmUYiIiIzjcHDTvXt3vPHGG/j666+xdu1ajBo1CkDFMu7IyEjNO+hNXDFiUt0l39e9uQqjP16PlExj9uAiIiKqLoeDm1mzZiEpKQkTJ07ESy+9hFatWgEAfvzxR/Tp00fzDpIxtp/Qd58wIiIivTi8FLxTp04Wq6Uqvfvuu/D19dWkU96oqNTgFVJVCA3uMDOZiIg8lMPBTaUdO3YgJSUFJpMJ7dq1Q7du3bTsl9c5ce6i5QMmY/pBRETk7RwObrKzs3HnnXdi7dq1qFOnDiRJQl5eHgYPHozvvvsODRs21KOfXsEVgyFCgzJ2Hq+aZ8NxGyIi8lQO59w89dRTKCgowL59+5CTk4Pz589j7969yM/Px6RJk/ToIzmiGlHJiA//0a4fREREBnE4uFmxYgXmzZuHdu3amR9r37495syZgz/++EPTznmaZ4a2NroLRERENZ7DwU15eTn8/f1tHvf39zfXv6mpfE3iiTQmnZJutFpuznxiIiLyVA4HN9dffz2efvppnDp1yvxYRkYGJk+ejCFDhmjaOW82falxu3RXN3CRJAlFpWXadIaIiEhjDgc3H3/8MQoKCtCiRQtcc801aNWqFWJiYlBQUICPPvpIjz56DAcGbrAyJVu/jmhAqRjgY1/vQNsZK5BdcNmFPSIiIhLj8Gqppk2bIikpCYmJiThw4AAkSUL79u0xdOhQPfrnVVwx0yO0uWY1e5K4/zQA4JfkU3h0QMtqHYuIiEhrTte5GTZsGIYNG6ZlX8iNiC0pZ2IOERG5H6HgZvbs2cIHrMnLwU0q81K70nN178NXm07gruuaKbZhsjAREXkzoeDmgw8+EDqYyWSq0cGNmgcXbtP9HPt13vAy9ezVnckZJBERkTsSCm5SU1P17gd5iCH/W2N0F4iIiBQ5vFqK7GtRv5bRXRBSnX0zyzlaQ0REbo7BjYZGdmxkdBfEaDSfxDiHiIjcEYMbDaklFKu5XOI9hfH2nMzDN1tOKNbLISIi0gODGzcyf+1Rl5ynOjuHW7RRaDT64/V46ae9+HNflmi3iIiINMHgxo38scf7AoFDpy8Y3QUiIqphnCril5ubi61btyI7O9tms8xx48Zp0rGa6FxhsdFdMNNqOomzUkRE5GoOBze//vorxo4di8LCQoSGhlrkmZhMJgY3HkDvgGPj0bNXz8W0YyIicjGHp6WeffZZPPTQQygoKEBubi7Onz9v/srJydGjjzXOOysO4NVl+3Q7/ica5fbYC1zu+WyLJscnIiJyhsPBTUZGBiZNmoSQkBA9+lOjmUxAcWk55q45ioUbj+NU7iVdznMqT5vdvKs7ArRwQyru+GQTLhSVatIfIiIiwIngZvjw4di+fbsefanxJMlyNKSkrFyhted79df92Jqagy/XswI2ERFpx+Gcm1GjRuG5557D/v370bFjR/j7+1s8f9NNN2nWuZrIXRJwteqHyHEuFntPfR8iIjKew8HNo48+CgB4/fXXbZ4zmUwoK6vZN6r3bu+MqT/sMrob1aZVIrCbxGpERFSDOBzcWC/9JkttIkOdfm01CxxrypUjSFxRRUREWmIRPzdmghtFO84SiZIY2xARkYaERm5mz56Nxx57DEFBQZg9e7Zi20mTJmnSsZrIOg7458gZYzoiqDqF/tJzLgq1238qHztO5GBsz+bw8fGCYI+IiHQnFNx88MEHGDt2LIKCgvDBBx/YbWcymRjcaOiln/Yadu7q7i2l5uY5G4TajZz9DwAgyN8Xt3dv6vwJiYioxhAKblJTU2X/n2xVJ2/GnXJuRFRnA84cB7ea2HcqH7c79AoiIqqpmHPjRkxwn6XgrtqhHGDKDRERacupjTNPnjyJZcuWIS0tDcXFlp/A33//fU06VhOJ3OTLyyWX5J7kXizR5DhC+cQCjbTayJOIiLyfw8HNqlWrcNNNNyEmJgYHDx5EbGwsjh8/DkmS0K1bNz36WGOI3L/fTzyEqcPb6N8ZAYw3iIjIHTk8LTV9+nQ8++yz2Lt3L4KCgpCQkID09HQMHDgQt9/OrAi982Y+Wee66SI1etenWbn/tFC7lMx8LNyQirJyRltEROTEyE1KSgq+/fbbihf7+eHSpUuoXbs2Xn/9ddx888144oknNO9kTXH2QpFwwLArPRcXi8vQ+5r6OveqekTej70RoEe+urqHmdJRRnxYsaLK19cH9/Vq7kj3iIjICzk8clOrVi0UFRUBAKKjo3H06NWRhLNnz2rXMw/lqsJ7N8/ZgLs/24yzF4pccj5nuXLqau/JPNedjIiI3JbDIze9evXChg0b0L59e4waNQrPPvss9uzZg6VLl6JXr1569LFGWXvQscJ9RgY3mm2u6cJzERGR93M4uHn//fdx4cIFAMCrr76KCxcuYMmSJWjVqpVigb+aorSae2898U2SRj3xLkLTW1xUTkREcDC4KSsrQ3p6Ojp16gQACAkJwdy5c3XpmKc6ef6S0V1wmX2n8lXbuDLcUBvdKSuX4MstHIiIvJ5DOTe+vr4YPnw4cnNzdeqO53PFzdNdar6sTFFfzSRW58b5PsxedVio3TsrDqDjq3/i+NlC509GREQeweGE4o4dO+LYsWN69MUrBPv76np864Rlb9g5vDorqt5PPCR0jrlrjuJicRk+WCnWnoiIPJfDwc2bb76JqVOn4rfffkNmZiby8/Mtvmq64AB9gxsJksWN/sQ59x6JcGUeDBOTiYgIcCKh+IYbbgAA3HTTTTBVqVgnSRJMJhPKysq06x3Jqnp/fuzrHTbPZ+ReQuM6wa7rkAtoFbgwtiEi8n4OBzerV6/Wox8kSGQaal9GnvsENzrn3BAREVlzOLiJiYlB06ZNLUZtgIqRm/T0dM065qlccaNWSyg+ce4iACA7/zIulZShef1a+nfKje3NECvut/pANn5KzsB/xsQiPNhf514REZFeHM65iYmJwZkztoXmcnJyEBMTo0mnyD6RHJY3l6cAAK57axUGvrsG5wuLVV6hn+S0XE2OIzblJN/oxo/WVzmO/QM9uHAblu06hQ8Ek5SJiMg9ORzcVObWWLtw4QKCgoI06ZQna1JX/+kgRweHjhuYdLz1eI5qG7Gl7doMiYkcJTOv5tQqIiLyRsLTUlOmTAEAmEwmzJgxAyEhIebnysrKsGXLFnTp0kXzDnqa6DrBWPxIT9zz+RZdju8NS7+tabbKSavtIJgDRETk0YSDm+TkZAAVn7L37NmDgIAA83MBAQHo3Lkzpk6dqn0PPVCfVg0QEuCLi8XarxyzXgpOVwldFq6oIiLyesLBTeUqqQcffBAffvghwsLCdOuUN9BrfKWkjLdee6pTufnHHSeF2u05mYc/9mZi4vWtEBLgcD4+ERG5gMM5NwsWLNAssFm3bh1Gjx6N6OhomEwm/Pzzz6qvWbt2LeLi4hAUFISWLVti/vz5mvTFkzhaGE8uR2prqnoujKusPaS+E7pI3CL3PkVN/WGXULvRH6/H3DVH8eFKsW0fiIjI9RwObrRUWFiIzp074+OPPxZqn5qaipEjR6J///5ITk7Giy++iEmTJiEhIUHnnrqXBRuOO9T+dP5lm8fcKWm2cum6ErEtGrTZOVwkkErJKlBvREREhjB0XH3EiBEYMWKEcPv58+ejWbNmmDVrFgCgXbt22L59O9577z3cdtttOvXS/bz9xwGH2j8uU8W40vfb0pGRewmTh11b3W4ZTrvtF7QJpIiIyBiGjtw4atOmTYiPj7d4bPjw4di+fTtKSkoM6pVnKiyqSHaelrAbH646LFzozp1ptQO5vTZpAiNMAJC4/zTu+2KL7IgZERHpz6OCm6ysLERGRlo8FhkZidLSUpw9e1b2NUVFRYZs7lmd/A9XePuPFIvv8y+7d3DoDgMl934htrz/0a+245/DZ/Hqsn0694iIiOR4VHAD2AYNldMD9oKJmTNnIjw83PzVtGlT3fvoCfIvlxrdBYccPXPB6C4gLUds5KbSuQvGVYYmIqrJPCq4adSoEbKysiwey87Ohp+fH+rXry/7munTpyMvL8/8xf2v5MkVB3SnvJKktFyUlSv3R6zOn0jSscBxqrEdBADkFBbjz31ZKCkrFzgbERE5wqOCm969eyMxMdHisb/++gvdu3eHv7/8RoeBgYEICwuz+HIFoyeltAhM1h+Rn+ozSmm5ciAgtFpKKOdG/1VXN89Zj8e/3oFP1x1T7xARETnE0ODmwoUL2LlzJ3bu3AmgYqn3zp07kZaWBqBi1GXcuHHm9uPHj8eJEycwZcoUpKSk4Msvv8QXX3zBysg6OZXrPsvFXak6IzffbxcbGUzPqbi2K/ZmqbQkIiJHGboUfPv27Rg8eLD5+8r9q+6//34sXLgQmZmZ5kAHqNiRfPny5Zg8eTLmzJmD6OhozJ49u0YtAxfl6MDNos0n9OmIC7nDJNq0H3c71N7RgoxERKTO0OBm0KBBilMACxcutHls4MCBSEpK0rFXNdPvezLtPpd6thA5hcWIa17XhT2ypcWmoa4MJbTI3Skrl+DrY/QkJxGRZ/GonBsSp8VN/LfdFQHP4PfW4LZ5G4XrvBjl9932A7RKetfCsWwj3yinUGwV1ZfrU9H+3yuQnHZeqD0REVVgcOOl5q89Wu1j/HPYMqH4yBn333KgsEhtibtGq6WqET4+/V2yULvXf9uPotJyh6e6iIhqOgY3ejF4JuHdPw+65DyioxCuUuqiXdPFloLLqxo0Vuc4REQkj8ENCZPLeVm2M0OTY7tTTR2t+qJV4KLUn0vFZdiamqNaA4iIqCZhcEPCjF7ZI7SjhUqblSnZmvRFRHUCl9SzhULHeXDhVtzxySZ8sq7605BERN6CwQ0Je2KR/VVqf+7Lwrw1nnGDzb1Y/ak0vcO8R7/aLtRu87EcAMA3m9NUWhIR1RyGLgUnz1JUar9C8ONf7wAAdG9h7HJxEcUK7wPQcJPOahwoveo+VpxxIiJyCEdudFJTKpOUWCXwZucX2bRxZQ6Ly2jUF1fk5Szfk4lB767G3ow88Y4REXkwBjc6casbsY5mrzps8b1cXszqg67LcxGiEnmKba6p75Lyqo/aC1yyCy4LnAF48pskHD930Ty6RkTk7Rjc6CQsWH4jT3fyzZbqb7lQoFpXBjiYdUG1jTsFgxuOnFNto1Whv+qYsmSXQ+0vl5TZfe6fw2dw88frkZKZX91uEREZjsGNTj65Lw6dmoRjaLsIo7ti10s/7XXp+crLJVwqtn+D1YLQiioBapuGujItx16TvafyVNuIHAcA7vtiK3adzMMj/yeWyExE5M4Y3OgktnE4lk3shz7XNDC6Ky6ltFXArfM2ot2/V8gW/stwsx3IS8qUk45FaLV0vjqjRBcERtaqOq/BSjIiIqMxuCFNffZPqs1juZcqbpg703MBAGtkcnC+356ua7+MoHf1YZHjv/LLvirtBfKE3Gh6kIjIWQxudKbVNIkn+2TtMU2Oo9VIiMju4mptXBkoVCfBef2RM1XaVM8/h89gxIf/YM9JrroiIvfG4IZczhsCvqS0XE2OYzdIcjAS0Wwnc4UT3/fFVqRk5uP+BVsd6BkRkesxuCG3sirlND5cedjp2jiujJu0yBMSq2Hj/HFEXrty/2mH2uddKrHfD0lCes5Ft9orjIhqHlYoJkWOJqSKkFtqXXkvfPjKap3YxmF227iLgsslAILtPi/UXa1GXITayDd64/f9jnQH5Qone3vFAXyy9hiev6Etnhh0jcDRiIi0x5EbUtTzzZWaH/PHHSdV22Tl2xaoWyWw6aXJjea8qlMzRo9NSl0RG1bmV/13xQEXnI2ISB6DG1JUqHNdmkq7rbYGkBsc2HY8R/U4ItMhIvGPUBuBSbCT5y8qPl+91VIWdYwFXiDwsJ02h08XVDmv+qmIiIzE4IbcwrpDZ1TbVN7M/zl8Bh//7XxejiudKbDda6sqe++hauCk1cqs6mwHMfXH3QKvFrPl2DmM+PAf7DihHqwSETmDOTfkce77omK1TquIUJvnLpWUITTI/be+qHT8nPzITtUgw5UhnL0gqUhh64ZKovlZd366GQDwr/mbkDpzlHDfiIhEceSG3NLLP9tuDXHy/CWr720Dg08Fauq4MitHJAdIberKHpFJKZENOO21d9S7DubZKHXnVO4lvLU8xelrQ0Q1G4Mbg/Rr5T3bMhyqko+hp1UH1BOKcwS2D3C3yay0HJW8nGqshLJoI9ohJ1nnTVXHQwu34dN1x8yjdEREjmBwY5BFj/RE4zr2lxF7kvgP1hndBRuFRaU4ca5Q13PouTCraqxib7qn6um128hT3xBo01H1HdcB4EBWRcCcetb+z1CSJKSeLfSI3Csici0GNwZyo1XLXmfAO6sx8N012H/Kdjm22kiJq6mturpQVIq8i7aF8xy9pdsthqzVNhECx/no78PanAzAx38fweD31uCN31M0OyYReQcGN+Rdrtxgz13ZefzvA6dtmsxdfUT1MO4WeO5IU15ZlHuxBOXlytGFHrVz9FBUKlZ+4H+JhwAAX6y33ay1Uv7lEny96TjOXlBetUZE3oXBDXmsfIVtAJRdjVxKysplW2g1muHKGGmtynJ63XcpF2kj0OizddpstAoA037YjRm/7GPuDlENw+BGZ0o3N3cbHfA0s/+2HYFZd1i9Xk6laT/uQodX/kRWnm01ZHtBj1uxChQuyhRcrBpMFJWqvye9p65EJGu0KSkArNiXBUC5WvSFolIs3pLG0R0iL8LgRmeeMRGgv++3pbvkPGcvqK+WqvT99pMoLi3H15uP2zynNNVRyaVLyjU6m56F81x5PUSDT5EPEDN+3osXf9qDez7bXM1eEZG7YHDjQs8Nb2PxvVY3LE8wLUG7CreuUJ19oRwldwMWCoqtXidyI089a5tMLZKLI9ZGGyLH+WrTCaFjifwLW7G3YnTn0OkLQsckIvfH4EZnVf+43tgpCgAQFlRRGJrTUvo7JrOUWGxvqYpGW46dw/t/HZQdKRC5Cev6M9Zq23EPlJR2XqidSo41AMBH4GeUU1iMd/88gGNnGAAReQJuv6Czqn9bm9evhQ0vXI+6IZ6zPYCnW5qUYfOYyJRGZe2Uyq0CGoYG2rT5ZWcGnhvetpo9dC2RWKukrBz+vpafe7QaZdRsxZbAYTYfE6upI1JF+vmE3Ujcfxpfrj+OlP/cINumtKwcm4/loEuzOqgdyD+tREbiyI2LNa4TjJCAKyM3BvelplpzUDzpuJLcdM7yPVladEeMiwoGAsBvu0/ZtvHAEaAfd5xUbXO5pExoT6ykExUjRZcU9tiau+Yo7v1iCx74kiuziIzG4EZnDGDEfbhSuwJv1SXyab7S3ow8fLbuGEqdXGHl7O+Idbih1e+a3Kora6K1aNzdgg3HhdqJ/DosuZI0v/2E8pRZdv5lVlUm0hmDG505cpOs6T5YeciQ8/6cbDtS4chP7caP1uPN5Sn4dmuazXO/786sRs9cw5lf0YQdttN9IuvFtdonS4TIYUQ25rxcUubQKjwlv+/OxHVvrcLzHpZgT+RpGNzo7La4JmhWLwTjeje3eY6Bj3vIyL1k85jap285+zNtNxD9TmAJvLO/ByKvsr7By67Msm4jc2TrNheL1adyTslcV3cjEkZ9uUG9LEB5uST7e2Tt/cSDACrKECg5frbQ6ZFAImJwo7vagX5Y+9wgvH5zrMOvvV8mICL34UhIsnBDKoZ/sA7ZBbYFA7cdt609Yz16IXcud5/YWJqknvMiR6jSsQvffY7AqM2vMnlKco6eUd/M9ZedGRj03hqMX7TDbpuLxaWYs/oIDp+2DaiJiMGNS9j7ZD6oTUPF1712cyyuaVhLjy55rK83i9U3cQmhJeUV/3311/04eLoA//vTdurt3T8Pqh7n52SZaSA759KD9bHlfqedCTcuKyTouguR93UkW32J+IYjZ4XO99k/FdtPrEzJttvmg8RDePfPgxj2wTqhYxLVNAxuDDRteFu8dUtHPDO0tdFd8Rgzft5ryHnlKvseE/gUbq1YYarh680nEP/BWtntIH7eKTYy4AyxPBjHjysy3bZIJlj1xMlakeuzMz1XtU1RaRn2ZqgXkBQ5FuAh24gQ6YDBjYGCA3xxT89maBQWZLcN83Lcw66TebqfY8bPe3Ho9AX8d8UBofbWU1daJb06+ysnlANk9f35i7Z91moDTq2mrly5sOknmbpMckT+Luw/lY9rX/4D7ymMDJaVS1i+J1M2oCbyZAxuiHTkTJzg7DLrlwVGteTuiSLBjFgejD70TKx1txXZIsvwAbHfq5l/pECSgI9X224wW2nx1jQ8+U0SBr67WrCHRJ6BwY0bULq5cNzGcSKbXurhr322Rf3yLpVoeo7FW9Iw5H9rhJYwA7YBR2mZ7d3cqSknx18i+zqRlVkiK85kaRS4uHLwVKTLl4rLsCVVfQNUkdGdtVcKWqrtGM/pLfI0DG7I6/znt/2GnPdcoe0Ui8jeRnLsvezFn/bg6JlCvPFbilPHVfoUX11iU0WOE8ltKnP2QotsCOpmozvLdqlPXUmShHWH1CtxK+WAVTqSfQFtZ6xQ/HdVVi5h8ZY0rt4it8Hgxs0x5YbkKE1dXSouw96MPNlieCfOqY/4GP0r58zvvFwBRVfSLr9H/TgicdwmgX218i+XCAVAH646jLJySXFEdMm2dLz40x7V1VuXS8pYnZlcgsGNG+C/ddczKoHyJ4El3dV116ebcONH650+175T6qt1nOZMUUGBNs73WT2ScmXgInYc9TbZ+UWqbTYeEdtY9Ndd6iv1/jmsHiQdP1uItjNWYPKSnYrt9pzMQ65MojmRIxjcuDm13ZhHdmzkop54l37//dtl53LmpladKZ7KlV3fb7efq1JeLiEzT76irkgtIU8cUUzJlAuAPG9FlQitArK9GeqrBPMvl+CPveqbyC64UulZqazBlmPnMPrj9ej7tvK/z83HznGFFylicOMGqt4ovnygu0OvnTs2DrGNwzTukfcrdTpHw5JI4OLouZSKt2nlqe+S0Xvm31ixV2zvK9tl5+ojA84GQM78ZGRHd6y+/3TdMdXjZOfrd8N0twBI5EqfEfg57xccNft9j/rv2uorCc6FCqvGNh87h7s+3YxeM1cJnZdqJgY3bub6tpE48J8bcPztUQA88xOyt7A3suEoR3+ExSorVxxhL/iq3NBz3lr7N3xJkmS3iwCAOauPqp77+FnHixzK0fOfgPXlefsP9RpDzicveyCNCjyePH9RqA7T/LXqv1frD6tXer5YXIqb52zAhysPK7ZjDpD3YnDjhoL8fR1qrzZ1Rc4Z+M4a1TZarRByx6W2zyfsxnVvrsIfAp+4AdupELXNIQHXJi+LnEukzsziLc5tAaLVBxVX7qullXMCgc2Jc+rBcGlZudCKv++3pWNXei4+WGm73UmlsxeK0HbGCoz9fIvisc4UFHnENiFkicGNG/D35Y/BHYksk9Wqmq6jfzxP5FwULvimxt4n18rgZJbCp9/VB7Jx+/yNSBUcpbG+MS8XyNUQOY5WQZJI8vLfB9SnDeWCVVcOELjbYIRId9Rq7QBApkCejSRJePVX9XIQy68E7RuP2k+sPpV7CT3eXOnSHD3SBu+qbmBUpyh0a1YHTwy6xua5GTe2BwBEhStt0aBb18hFRAquna9SR0d0XyutcovseXDhNmw7fh7PqKyAsUdkw0lPYH2VKze/VHyNkxGIVoGL2wVAGk2BydWbkiPyZ/PPK4U51abUth/PERp5ItdhcOMGAv18sfTJvnj+hrY2z/Vt1QD7Xx+OF0bYPkfG+1OmKrE1kWkEkT+0JeXqn2zzL1+tiLz5WA4KLpcKHFmd2js4r3JDcWRkyvqm7+ymoXrlUogEonL1Y6x7o2dZAM0CIA+cAhN97/+3SXl6UZIkvCYwArQzPRf/mr8JA99do9huzuoj5lw30h+DGw8QEuCn+Lzan9o9r8ajbaNQ7TpEZk9+k6TaRqsbjUhulVqQATg+BVZdc1YfQdsZK/D3gdNOvV5kZZYQgSknV5K7uVr3J0dwFMIZWr11VwZAIucSaXO5pEx11FB00HPMnA2qbZLSzuPdPw9iwmL7fy/WHMxGixd+V60DRGIY3NQAoUH+CA1SDpBIP1rlxohMP/oINErLuVql+Gj2BZzSqF6IvUDh3Su7Uj+fsEfl9c7fJNcJFJETodUUr1ZJ/jN+Ud8MtVzHqUftpsDcawRIZLpWyz6fPK++8vKBBdsAqI/ofbL2KJYmKSfrb03NUa1R5G4/E60xuPEGTLpxa/+av1GT4+jxU75QpM20VXUt23UKPd5chR0n1DeEBGxvuuk56jcP7UbQtDm2yM/zyGn1nKQvNxizUazWxKZvtflX4Oob+6RvkzU5zoGsfMz84wCmfL/LbpuzF4pwxycVVcrtyb1YjO5vrMSDC7Yqnm/HiRwkp513ur9GYnBDpDOR5F+RP7UiuR4ica7I3/XCKkHPqdxLwgnMzp570rfJOHuhCA8t3G73teXlEo6duSB8Y3JljoteRH6eX6nkjgBA3kVtd6d3lCdOgbmjG2b9o9pmosLUV6UuryfiXGExVh88g1I7q0IvFJXitnmbcMvcjZrW3nIVBjceaOGDPQAAcc3rAhD7BMhaOO5ti8BGh678CVbNRyiwM7qjtHmns5SmWP69bC+u/99a1UrDjvwhtr5ZahXEiSwp15P1qV782XZK0Lo/qwWWuAudzGju1h8X23zs6uinyAcBey2qTmvZ+7cuSRLGf70Dj3+93e2muQwPbubOnYuYmBgEBQUhLi4O//xjPzJds2YNTCaTzdeBA+pVRb3JoDYR2P7yUPw4vrf4i1TujLPu7FKtPlH1PPx/9kcsKmmVc6PVp9/ktFzz/6flXNR3w00AizZX7Pz9v7/sF2Zbsi0N1778h2LhwbyLJViyLc1iZVmlA1kFqv04lK3eRoT8lhGO34xEfi+2pqpP98lNnVjfrw5kqf+MRaYx9AiMteZet2rnVGd5/V2fbr7axs5rswuKsGJfFv7cd1qoArUrGRrcLFmyBM888wxeeuklJCcno3///hgxYgTS0tIUX3fw4EFkZmaav1q3bu2iHhunzzUNAADRV+rdNKgdaJ6maFA7QPX1an//xnRtjP6tG1Srj6SvQwL5F+6WfiWyV5Mzq6Hs/bGtTFp+QmEV2/hFO/B8wh5M/m6n3TYXikrxy84MFFwusflEKpLfI1LU0JXbOGj1a/HglaTXqqzfhVrFXwD498/7bI9j4MiW08dxs9EKa+VCIzciydXyj1//3hqHzuVKhgY377//Ph5++GE88sgjaNeuHWbNmoWmTZti3rx5iq+LiIhAo0aNzF++vo5tV+CJGoYGYte/47HmucE2z/1nTCz6XFMft8c1MaBn5Cp3fLJJtY27TT/ur7IT95mCImw/YfupfpvAqIKWK4I2XZkCXKUwDTNlyU48/d1OPKMSAC1NOom8SyU2NwiRSrq7T6rvuC1Lr5uIzK+OdbB8pkA9EJVbHWjd5SUKO9ZXkpuqrclVnq2JBFYi/2zERnfkG1Xd4JTBzRXFxcXYsWMH4uPjLR6Pj4/Hxo3Kq0u6du2KqKgoDBkyBKtXr1ZsW1RUhPz8fIsvTxUe4o8AP9sfWVR4MBY/2gvD2kfafa3IJ3qRhFVybxuPqm8qKEKrXwWRabKqCopK8Y/MxohKgYg91flb+9f+06rnff7H3Zjy/S48+c0Osf44MV4gMk0mv3pLn0/sev6JsD7XnVWmReyRW+7syluskfdzkcBFaORGswBIvY0rGRbcnD17FmVlZYiMtLwhR0ZGIitLvuprVFQUPv30UyQkJGDp0qVo06YNhgwZgnXr1tk9z8yZMxEeHm7+atq0qabvg8idKC0RNYKjwY09BVXyYyRI2C+T3+NMccLq/EH+/Upez4Yj9pPBL5eUIXH/aeEl98705/i5i6ptvPVzyy1z1QvopWSqf6DNkhlpM3JlllBwqlXg4kSelxxXTrOKMDyh2Hq0QJIkuyMIbdq0waOPPopu3bqhd+/emDt3LkaNGoX33nvP7vGnT5+OvLw881d6uvpwqDcYcG1Di+9FpivUWux+NR59rqlfjV6RO1jh5GaVztDjplpSJiEj1zbv5Zed7rfU+7Vf9+HRr7ZjgkIOUP7lEizckCqUn+Ss0/nq00lyW3VotTGsXkrK1E9+88e2AZB1n0WmfNNkgki93rrINRWachLooUhQ4u65RXIMC24aNGgAX19fm1Ga7Oxsm9EcJb169cLhw/Z3LQ4MDERYWJjFV00w+64uuKlzNL566DrNjhkW5I/agax07OmUdvmuVFjkuqrKWrGuOrtsl+2eVM58uqzOn/Vvt1Z8mFors9dUpelL9+DVX/fjrs/sT8OcLyzG+4mHcOJcoaGreJzP6dKm184cpdhOHZeqqlbtNp/L6mSjP7ZfFK+S3AiQNZEgVqtEYLEASKM2bhb/GBbcBAQEIC4uDomJiRaPJyYmok+fPsLHSU5ORlRUlNbd80jtoq4GbnVCAjD77q7mEZyRndSvkVhejtPdIw/y2FfqS9NFuFuC82+7LQOeH3Yol7EHgOUKy8rNqvGHfeWV/B6lGjvPJ+zG7FWHFavOAjAXZNPrZiQSKDhLq2kgvW6yeZdsSwdYj2iI7DN1k8xIks1xBfojNrrjwpwbN1s8b+i01JQpU/D555/jyy+/REpKCiZPnoy0tDSMHz8eQMWU0rhx48ztZ82ahZ9//hmHDx/Gvn37MH36dCQkJGDixIlGvQW30rReCJZN7It/ptmuqBp7XTP830PXYfbdXe2+XovbUPP6IRochYxmr3BfVSJ/8HzcLGAWWe1jnR+jVf2O6tx0tx6vWFGmtMv75mPn0HbGCvzfxuN226Rk5qPff//GT8nqQZ070mx6RKf7cJbAqIxIm9mr1EdXFyr8nCttPaa+ElGkBlJ1losbxdDg5s4778SsWbPw+uuvo0uXLli3bh2WL1+O5s2bAwAyMzMtat4UFxdj6tSp6NSpE/r374/169fj999/x6233mrUW3A7nZrUQdN6tgGGj48JA69tiDrB/tU6vton8b8mD8AogVEi8nwvy1S9tabVCjxXBkCLt6hvZ2Bdsv4TlarJoqpzf5i8ZCdKyyW8ssy2hkzVNifPX8LkJfYTz/Mvl+C/Kw4gJTNfs1VX3kqvd/7R30dU27z9h3rx2kcERmAfFWiTIbDxp7v9FhieQPHkk0/iySeflH1u4cKFFt9PmzYN06ZNc0GvvJfSTUKLG5Gfjw/8RD6uk8c7KrBVQe5F96paKkIkSfX77eojHyUCUziuTtQU2Zrijd/24/vtJzFvzVF8el+c3XZZeZcRERoodF53+1TviSMRRrpl7kYcf3uUYptLMvWNjGT4aikyzvM3tAUAh4IRobwcledbRdQWPh95tse+FqsB4ypa3bAy89Q/yX631bLS+tebbUeE/tzn+Mo1ofdQjfe5N0N96XTi/tPoNXMVJn5rfxVYTmExnvxmB9YcVK5RVLmEX7sZJ9dFJQyArhJJuHYlBjc1TKfGdQAAoYF+eGxAS8y+uyvWXcnREVnmrRbciIRJvz3VD3d2Z70hqqD3nlR6ELmpnbBaOpxTaDuKZV2w8GOZ6Qi5mj5aqM59ef7aowCA5XvsB2dvLU/B8j1ZeGDBNrt/N/aczEPbGSsw4+e9do+TlXcZQ99fi4UbUhX7nHbuoqZ7VmmX4FwzIiDWuSFDhYf4Y9e/47Ht5aHw9THhps7RiK4TDAC4v08LvPOvTnj/js52Xy9UL0dgeMeHv3l0hUh+gQitVmYZuSJQrtjf5+st83nkcmrkNgHVQnVuV1WXRdu7v89aWbEJqtyoVqV3/jyAI9kX8Oqv++222XzsHAa8uxq3zLFf3V6SJPy66xROnNNm53egZucbuTveYmqg8BB/BPnb7sfl7+uDO7o3RTOZhGRRJpP66I3IzcPfl3k7dNUOmT2pnKHZp3E3u6l9kGi5U3pyuu312n0y1+L7snLb/JsCJ4IkvQcmigTyhH68sqR/v0I14mW7TuGpb5Mx8N01dvssSRJWpZxGes5F7abJ3OtXpcZgcEM2fKuREKzV6pg/nh7AqSsyW7Q5TbVNlkAejCu5cgTotNXy4srigVVZF2+UWzH1zoqDqucSCTQ3W2166Q77PW0/rt7vVSnZePj/tqP/O/b3LCwrl/C/vw5i/eGzioFL3qUSl03VpOdc1G30zlMxuCEbnZvUwcBrG+Lu65rirVs6Iq55XXxyZdVE31YN1A+gmpdjUm0U6OeD4ADv3+2dtDNbYHor9ax2UxKeRuQzy+Fsy406f99tW8Dwjd/tTw9VelVhOXqlQ6fVNwW1pvcoSGU9IcB+QLY06SQ++vsI7v1ii93jpOdcROfX/sKtczfYPU5xaTnGf70DCzekKvbpYFYBzl2wX5/p5PmL6P/OanR+7S/F42w+dg5Hsi8otiktK7ebIySSO+TM/m56MXwpOLkfHx8T/q/Ktg339Gxm/v87ezRFaJAfAv18nF4JYzKx0jEZQ25Ew9rh08o3AEC7m6wr/x04M6oqt4WFtfetpsREPfPdTkSFBym2kdv125r1TVfvsZJ0gZovv10JCnedtN//ZbtOYcW+LKzYl4UH+sbItjmSXYDhsyo2hra3FLtyRErpd/LomQu468ou6/aOc7mkDH3e/hutI2pjyeO9bZ4vLitHoJ/yB86iknLZlAcjcOSGHOLrY8LoKknIclxVcv+/t3XEHd2buORcVHP8lKy+AecKJ5ZwO0ts09vqb4zrrJ3puaptbp9vuzHlxWLL5Gm5zSuftNlw1PYOLlJAcd8pyyBDbHsDrbYukG90QWAaaZvAVJpI/pdIwL4lNQc5hcXYolCxWJIkPPVtMv67Qr2AoNEY3JBTQoPsD/o1Clcu7KU+KVXxR0Ptg2atQD/UqyVWRIxIS0r7QFVSWiZdlcj+P2pEbnA+AiM3Ru4FdlGmCJz10u5hH6yzaTN3teV0pFyhwgcXbLP4/pLM9MnGI2dtHrMmsjGmSKFET7XrZB5+3XUK89YcNborqhjckFOa16+F529oi1dHt8dDfWPQITrMPJX1xKBWGNMl2lwk0JpYDTJJ9Q+tSADUv3UDNKjNAIhcLyNXoGS9JFYRWQueWH7BOlCQiwOtk3YHv7fGpk3uRctRkr5v/23T5p7PLXNo5H4qj3y1zaqNbatXlu21aqMvLQJS0Vo8nhS4MeeGnPbEoGtkH68d6IdZd3XFkewC2eFLPx+TalAitgut+gjQsPaR6N68Hj5Y6VxeAJGevtmivgrsYnGpzc3Zmlb1p0SIHEXkXCK3U5HFRoU6lf2/fZ7tNNnejHxc3zbS/P2pXNuRnG+3ptv9YFfpiFXitlxwsUchX4fUeWAsT57CXl6OyWRSraUjMlQvOieu9nf21q6NMSK2keqxiIzwx1716a3S8nJkqkyZiExLeatigX2+rB0UWM019P21qm02HT1n85h1DtKqFNstKqy3M9glk9u0cv9p1fP/slM9h+zoGfWcHCFu9CvG4IZ0ExLgh60vDsHySf0R4Ffxq1Y52nN/nxZ4qG8MFj3cE6+Obm/z2nJJ0mQliSRJqv/emtUPQddmdap/MiKD/LlP/SZXWFSKYwJL4bML7C87doRaHpDI6Kw3xGPjvtxq89h5q5E4kd27b56zweYx69e98ovtNhZPf7fT4nu56dJ4mVwma9n56r8XeSojjK7EaSnSVURYECLCgnDojREVgcaVv1aBfr7495Wgpl/rBjh4usBimW5JmaS4IgsQG5URWxWhPqx/a9fGiAwP8ohEOiI5fx9Q3sASqLjxnVWoqQJU5K+oBUmnci8hRaFaMOBAlWdW+BX2f5vsb2NRSS7fyHr6b7HMdOnNczZg/r1Xd4mXq89z67yN2P7yUIGe6o/BDbmM0jx8aJC/xffFpeW4r1dzpJ69gB4t6tl8+gAqtosQyzVQfl4SaFM7yE812CLydGk5F1XbiIz+5F0S+wS/SiDg0nIzTBLz4k97bB6z3vhVbq8vtcDYlTgtRW5hwqBWiGte1/x9rUBfBPj54I0xHXFzl8Z4/47OCLNafl4nxN/6MDYqRnc0yF4W0LJhLQxtF6nekIiQnqO+miz3YglWyuSjWKvcW4qoEoMbcgvhIf5IeKIP3ru9M54b3gatIkItnr+1WxPcFmdZsE+SKiomA0A/O9tC1Faox2M+jkD/KqaulLWLCsMNTEwmcktKO49X+lWgInNOofuMTgDut4mru+C0FLmVf8XZrzjcKqK2zWNN64Ug5fUbEOTvg7eWp+CzfyzngeuFBKCwqNTmdVWJLTsXSHAWCID6XFMfY3s2x4TF1pVXichoaivOALFNXC8UuU9ibU3FkRvyGHd2b4opw661eTw4wBcmkwkvjWqPJ2Vq79x9XTP0b90AL49qJ3vc5vVDVKeuRJKOtVoG+fKodphxo+0KMiLyDHNWqy88WLDxuGqb6Uttc1+spWSqL1l31e7k7oTBDXkMP18fTBrSWrGN9aZt0pXHvn64Jx7p3xIrpwy0qWkT5O+Llg1qKR43WGQzOJHVWwJtagf6qfYntnGYUM4REbmnE+fUk7dFfCqwt9b0pbtV2+ReLFZtI0KSJBzMKkCpE7WFtMTghjyW3Fzzg31boHOTcLuvaRVRG+2jwqyOAwxq0xCvjm6PH8b3xqKHe6JWgGUwc22jUDSoHaDYH6HCgwLz4yKfsW7r1gRz7umm2i5aZddlIvJ++ZeVp+YB23o4cuQ2N7V267yNGD5rHSZ9lyzSNd0wuCGPJRdLhAb545eJ/Rw+lslkwgN9Y9CjRT30a90Azwy1nf4a2i4SD/WNwYd3dZE9RrdmdYWKjqk1KRfIXhbJEwr298VHAgEQEZFWktNyAYhvHKsXBjfkcUZ1jELriNro1bK+U6+/pVtj1TZyQYqPjwn/Ht0eN3dpjMWP9kQ7qxGgQH8f9GvVEADsbi9xXYt6LttXSyQJesLga7BsYl/1ExIReRAGN+Rx5ozthr8mDzBv6eCoJnVDsO+14YptQgKsFhJaBRx9rmmAu69ratlEAhqGBmLXK/FY9exAtG1kuZwdAEIC/WRXfVmfSjVwERzdUT9ORTFEJW0bhXLvLSLyKAxuyCNVd4fjWoHKVRBui2uMgdc2VO6DncfDg/3h7+uD3yf1l12h1SoiFF8/fB3+fGYAroupZ3uAKttU2CO0ekuASH5Pjxb1VHc5BpjfQ0Tug8ENeTWRncPlBPr54v8euk6xTQuVFU2+PiaEBVuuaKoMR/q3bog2jUKx5LFeeGNMrEWbcgno3bI+WkXUxqiOUbirh+UIkahG4UFCQaD69hTq01tjukTjpwnK01udmoTj/Ts6q/aHiKi6GNxQjedsANSvVQO8fnMHxTbWMYH1mUwmE8KtAiBJkhDg54PEyQMwZ2w3vH1bJ3x0d1er40ho0eBqXs87t3VCg9qBFm0Gt4lAoMrUncgIkFY5QA1qB2KIyvYU4cH+mDi4lfoJiYgUMLghrzS0XSSiwoPQv7Xy1FJ1mEwmjOvdQrFN72vUk56tR0Uk8+NXn7Cus1MuAVHhwUh4ojcSJw/AHT2a4n8yoyJtG4Xi9rgmmDi4FZY+2QejO0dbnUuCn+/V89zfu7nNMSoCF+XQRaRGWFm5+ghQ64jamHi9cnDTOqI2Ul6/Qf2ERFRjMbghr/TZuDisf/56BAcIFN/TUZO6Idj4wvWKbSLDLHNV5EZKbAKgK23imtdD68iKxGV/H9vIwWQy4d3bO2Pq8Dbo1qyubfAiAS0b1MKoTlEY27MZXrs5Fr89ZbmUXpKAOrWuji4teLAHbuhgmWBcXm6ZvXyrzIq0cklSHd0pkyT4qERAZZKkOiLl72tSnVYEgCFtI1TbEJHnYXBDXslkMsFX5mbvrOpsThddJ1jx+e7N6+K54W2qnMtWVLjlMZztj3UOjnTlsTn3dMObt3S80sbyNZIkISyoYmPTXyb0xeA2EXjuhjYWbcrKJdSukqT95piO+GfaYJs2ajlAIqM75QJtagX6qSaEA8AXD/RQbTM13rbmERG5NwY3VOOpjRTozWQyYUKVPBO5HKD20WF4+9aOVdrYHkdkB3S5wMWmjdX4SuW+NHHN66Jz0zpX2lgqkySEBPhh0cM9sejhnggO8IWPj+1xQvx9Ufnwxheut8klKi2zHN3pKbOarExgNVlZmXZ76Uy8XnnLDwBY9HBP1TZ3X9dMi+4QkQAGN1Rj3derOdpHhWFYe+UkV0CbZdei7CXw3qVyc+zYOByP9o9RbBNgVdNGLkiwfqhqTo6915VfCYD6tW6Afq0bVLSxek25JMHHx4R9r92Afa8NR3SdYPRr1cCiTVm5ZDHitvDB67Dr3/GWbawCl4UP9sDMKoEfUBEAuVLle1Zi3Uc5/QWOY69AJBFdxeCGaqz/jInF8qf722y2KUdoTyiNbqjOnqtyZ3QlHaLDcEvXq/kwcsexTl7287H9M2EduIQKjBpVjgAFB/jarTNUUl4Ok8mE3a/GI2nGMAQH+CLQ3/L8lcnLL45si8cHtMSgNhH4V1wT2XON6hSFTk3CsePlofjmEfXRFaN9LTACtM5quk+O2sarRN6OwQ2RmxGJkURWJ8kxmUz44M4uisdp0aAWHul3dQRILtiyTtT2k6lybD0CJBdE2kuUDgvyR71a8huVVo7KPDbgGkwfWVEk0XpqsTK4mXNPN/wyoS/q1w5E31byoyJv3hKLxwe2xIYXrse7/+ok26ayQOG8sd1wc5do2TaVrEfHjPD31EGqbba9NFS1zX9vUx9tuq+X7Qo7ay0bMtgi1zL+XyGRG6ucAhh4retW1YjELVrNutg7zss3tldsExkWhGeHXavcJjQIsY2v7r9VN8Q2WLHeQkNkxKFMJiKzzh0vrdJGLT9nbM/mmD6iHRrXCcbt3eULJq55bjD2vBqPER2j8OFdXWXbzLy1I+7t1Qz7Xh+OVc8OlG3T+Epy+f9u74zBbZQTnge1aej0FiMiGoYGqra5s4d6ntB/rIpQyvn72UGqbaaPUK+C3aNFXdU2RACDGyJFq54diJ3/HoZGGm0tIFQQT6honjbTZCLHsTdK9NSQq4m2cufy8TFh2YSry8rlzhUS4Id3qoyWyJ3Keu+ra2RGAawDGK2nZQL8fBAa5K/Y5u7rmuGNMR3h7+uDaxrK7x+2euogJM0YhtvimmDBg/JL1Rc+2ANThl2LBQ/0sFvPZ8qwa9G5STiWPtkH9/byjkTlxwdeo9rmh/F9VNt8el+cahu10TcAmHGj8hQvuTcGN0QK/H19UEdmxEFPYoGLRucSOo5IkCSv6oope+e6o8poiVyQ5Otjwq8TrwZJIqMZdUKUAxEAshub6i3Az8fudFulQW0iMGlIa8VyBpOGtMYvE/uhW7O6eGOM/NTRuucG44v7u2Pva8Px+bjusm0WPNgDD/ZtgaQZw/DhXV1k2zS6Uodp5q0d7U5BVa4Ee6BPC7vvy7oSt17iO6hv8mpv9K2qh/spJ+cDwDSrkghyZPePszJpiPqKvC5XVioq6dQkXLVNZ4E2jcI8f584BjdE1RR0JeFVZMWMCNHtDtRUd3NR87mERpu0CcjsNelY5Q+yveNU3end3nHeue3qKJHI9WleX31lUmSY+vSOEZrVD8GQdpGoHeiHoXZWBA5uE4FXRndAvVoBuLmLbeFFAFj57ECseKY/7r6umd0pqJm3dsTBN27Aqzd1wO5X42Xb/P3sQCx6uCeSZgyzOyry21P9MHnotdj20lC7q8smD70WAX4+ePOWWDSpK19Dqu6V4HbgtQ0t6i9p7clB6luFfP94b9U2U4ap11L6WWXvNgBYNrGfaptfBNpsmq5ceBSAUB0pIzG4IaqmbS8NxZqpg9AqQpuRAKFVV0LBhFZtBLqj3kSzESl7baquwLI7StRDeZQIAJJnDDP/v70aSD89eXV6xF6ZgKo7qdsbtQit0mc982uqo3agH9o2ClNtF+hXkTAeZmf6rn7tQPRr3QD1agXYHRWJbRyOp4e2RsPQQLt1gZ4e2hr7XxuOsT2bY/3z8jfhFc8MwId3dcHn93fH7lfkg61lE/tiXO/mWDN1kN39zN66pSPqhvhjyWO98B87+8h1bFwReN/StTF6t5TfbqVVRMU05Y2dovBQX/n3HuTvPj9/kcBfpAK4kdznahJ5qNAgf9Udwh2h52opZ45Tnbwci+O4MkgSOI49datMG9kLgLo2u5rYaq8/Twy6mkNSbuc4W14acvUbO50OE1hmLzLV4E3kVudVFRkWhJu7NIa/r49NMclKnZrUwes3x6JFg1qYOlx+eumens2QNGMYerasj/vs7CO36JGe+Hxcd/z3tk749rFesm2+e6wX3vlXJ7z7r87492j7o1YP9GmBrS8OwZ/PDJBtUzma9fE9Xe1u2vvvK6NiM25sj9u6NZFt88l9cQgN9JPdJqXSiNiK6T09R770xOCGyIVENtLU6gYvQrvRFK0CINf1R6sgUihvyU6bkIAqo012Gm2aPkT28aqWPqk+ZXGnnZVgVXUWyOuoadRGMcKD/TG0faTiyFuD2oG4o3tTxb3uWkWE4tWbOiAiLAht7OSD3X1dMxx6YwRu7BRtd9Peh/rFYM+r8Xi4X4zsZroAMLxDI+x6JR7/u70zvrMTkL17e2d8eFcXbJp+PdbYKS3wxpVpyinDrsUklQ1vXY3BDZELbHtpKH6Z0FcoKdDep/yqXJlQXI17t8OtNFtNptWKM61GrdSb2D2OvYKHVYnso/ZfOzV8qkoYr54fIrIaSWRDUpEl5N6yEkxLItOXaiv7gIpkf5PJhF52ptJqB/rh5i6NFUem7+3VHMkzhmHSkNaYEq+eXO1KDG6IXKBhaKDwp2J78/ZVabW5gGY1ddws4HDlVJpY0Oa6CtfVoTbdA4itRvrMzuqsqu7r1Vy1svUbYzqqTsstFqg8PeeebqptRAKpO7rLT/NUNUZgmbnIVKOIuObG1/2pq7L6zygMbojcxNYXh+D7x3ujp0BwIzK6I8K1AYdG/dGsjQtHktSbaBawugMfH5NQgqzIprVVp+7k9GnVAC1UVrWN6hSluvT/PzfHytZQquq/t3VC6wj5GkaV3r+jizmB2J7fnuqv+DwAzB2rHpB9eX8P1FKY6gKA+feqH+exAS1V24iounVLoMEJ8gxuiNxERFiQak2Mrs3qAABu7ar+CVKMK6eKtMqDceEKL6FcIm3auMHAjaZENpsVmE2z2aJD/jgi51JuYzKZVGtamUwmRKgs/ffxMakGW83qh+DGTlGKbUZ2jFJM+AWA8BB/jOmq3OaG2CjVmj3TR7TFyI7KI3L2cnPstWlpp5ClqzC4IfIgPzzeGzteHmo34bAqtU+PAFBern5Ot1suLnQcbRppdS5vC1y0CkpElhwLVWvSqj9CpxJrpdpCg4Cs4jjqvVHLyTKZTGilEoz0allftWhh56Z1zBvUGj3N6plrvIhqKD9fH9SvrfzJcemTfbDxyFm7dUKqEtnNW6ugRCTHRbMl5S4cTdEqsPMkJpNJ9Y0L3ZhFz6XBcbQY3anoj8i5tGmj1fsSCxBFjiN+fbSaOncWgxsiL9OtWV10q1KHRc57t3fG0qSTmCiwfNOV01Kajaa4tPCgZyQLa0mbsQvR0R2RNtoEJSKd1i4A0uY4WgWIWgVkle9Lq1pczmJwQ1QD/SuuCf4VJ5a3EyNQoFCrP2RC9Xs02hBUszo36k28cFrKBLV3rlVQIjbiInAu9SaaBWSajVppNL0lFiRpG7QZPXLDnBsikvXj+N54fEBLi0q71irr9tzZQ71AnEtXHmm1CozTUvJcmE+jVTAhNO3iwqkioVEigTu0VsGfI6MyIscxOqDnyA0Ryereoh66t1BevbXk8V44ef4SrlFIRpw4uBXmrDmCF0a0tdsmwNcHxWXl6CNQwVmkYJ12q5M4LSXHpVNOAucSodXNW2hDWo1GSsQSk7U5l0b77JqvD0duiMhjBfr5KgY2ADB1eBscemMEYhvb3/9o1bMD8dYtHTFBIQfonds6oXGdYIudva3dfmWq7Zmh6rssu3RaSqCNu3DV0mst22g3vaXVKIgrR1O0aaP1FCGDGyLyev4qlW+b1gvBPT2bmXeWlnNHj6bY8ML1aB1pfxn8O//qhC0vDsHozvarxL48qh0A4N3b7QdJz13ZSLFyo0I5lVNyozvZP9fd11VM1z09pLXqca4X2LLAFVya6CrQRmwURKM2GnXIpbk7Gg2RaR3YiZSZ0BOnpYjIa5hMJkSGBSm2eaR/S9zfp4ViwDVhcCs82LeFYnXchQ/2wNpDZxDf3n7xszfGdMT9fVqgjUJA9sX93bF8TyZu6my/GFu3ZnWQlJaL6xSmCaPDg3Aq7zLqhtjfV2jhgz0wcXEy/qsw+nVzl2gsTcpAtysFI+VoNy2l0U1Xo9wUrVYVuXS1lCtHgAQaVb4vo6diGdwQUY2jNpIEqJf9rxMSgJu7KFeH9fUxoW2jMMU29WsH4j47OzxX+mxcd/yUnIFbFKrRLnqkJ2avOowJg+1P7Q1qE4Hdr8Qr3qTeGBOL/q0bYHAb+yNJLRrUwr5T+Yp97hlTH7/vyUSowsaf7aLCkJF7SfE4g9s0xJHsC6insIdR1bL/9sR3iMTW4zmIVKgwPLhNQ6w7dEaxz7UFNjIVWWEYoRKEA2IBUOM6waptwgQ20hTJZQsQ+Hdztc6NalNdMbghInJz9WsH4pH+yvv/tGxYG7Pu6qp6LLVP3yEBfrhFZXuPeWPjMPOPFMU9id66pSNaR9bGGIUA8O3bOmL2qiDF1XbPxrdBy4a1MahNQ4XjdMLDC7fhSYXA7oE+LdC0XojiZpPjerdAZFiQYpvnR7TFoewC3Nuzud024wdeg4LLpRjWPlKhTUucOFeIEbH2t2EYcG0DLNx4XHFU5e7rmuHYmUL0a9XAbpt7ejbDP4fPKE593tmjKRZtPoEh7ez3eUzXxli8NQ19Fc7lLkX8TJLBY0dz587Fu+++i8zMTHTo0AGzZs1C//72NxVbu3YtpkyZgn379iE6OhrTpk3D+PHjhc+Xn5+P8PBw5OXlISxM+RMVERGRUSRJwqaj59AqorbQSI8W5xPK4VGQnX8Z/7fpOGoH+iuWkXCGI/dvQxOKlyxZgmeeeQYvvfQSkpOT0b9/f4wYMQJpaWmy7VNTUzFy5Ej0798fycnJePHFFzFp0iQkJCS4uOdERET6MplM6NOqgUsCm8rzVVdEWBCeG95W88DGUYaO3PTs2RPdunXDvHnzzI+1a9cOY8aMwcyZM23aP//881i2bBlSUlLMj40fPx67du3Cpk2bhM7JkRsiIiLP4xEjN8XFxdixYwfi4+MtHo+Pj8fGjRtlX7Np0yab9sOHD8f27dtRUlIi+5qioiLk5+dbfBEREZH3Miy4OXv2LMrKyhAZaZm8FBkZiaysLNnXZGVlybYvLS3F2bNnZV8zc+ZMhIeHm7+aNlUvE09ERESey/AiftZzfGoJTXLt5R6vNH36dOTl5Zm/0tPTq9ljIiIicmeGLQVv0KABfH19bUZpsrOzbUZnKjVq1Ei2vZ+fH+rXl9+TJjAwEIGB9usaEBERkXcxbOQmICAAcXFxSExMtHg8MTERffr0kX1N7969bdr/9ddf6N69O/z91YsUERERkfczdFpqypQp+Pzzz/Hll18iJSUFkydPRlpamrluzfTp0zFu3Dhz+/Hjx+PEiROYMmUKUlJS8OWXX+KLL77A1KlTjXoLRERE5GYMrVB855134ty5c3j99deRmZmJ2NhYLF++HM2bV1R+zMzMtKh5ExMTg+XLl2Py5MmYM2cOoqOjMXv2bNx2221GvQUiIiJyM4ZXKHY11rkhIiLyPB5R54aIiIhIDwxuiIiIyKswuCEiIiKvwuCGiIiIvIqhq6WMUJk/zT2miIiIPEflfVtkHVSNC24KCgoAgHtMEREReaCCggKEh4crtqlxS8HLy8tx6tQphIaGKu5h5Yz8/Hw0bdoU6enpXGauIV5XffC66oPXVR+8rtrztGsqSRIKCgoQHR0NHx/lrJoaN3Lj4+ODJk2a6HqOsLAwj/hF8TS8rvrgddUHr6s+eF2150nXVG3EphITiomIiMirMLghIiIir8LgRkOBgYF45ZVXEBgYaHRXvAqvqz54XfXB66oPXlftefM1rXEJxUREROTdOHJDREREXoXBDREREXkVBjdERETkVRjcEBERkVdhcKORuXPnIiYmBkFBQYiLi8M///xjdJfcxsyZM9GjRw+EhoYiIiICY8aMwcGDBy3aSJKEV199FdHR0QgODsagQYOwb98+izZFRUV46qmn0KBBA9SqVQs33XQTTp48adHm/PnzuO+++xAeHo7w8HDcd999yM3N1fstuoWZM2fCZDLhmWeeMT/G6+qcjIwM3Hvvvahfvz5CQkLQpUsX7Nixw/w8r6vjSktL8fLLLyMmJgbBwcFo2bIlXn/9dZSXl5vb8LqqW7duHUaPHo3o6GiYTCb8/PPPFs+78hqmpaVh9OjRqFWrFho0aIBJkyahuLhYj7ftOImq7bvvvpP8/f2lzz77TNq/f7/09NNPS7Vq1ZJOnDhhdNfcwvDhw6UFCxZIe/fulXbu3CmNGjVKatasmXThwgVzm7ffflsKDQ2VEhISpD179kh33nmnFBUVJeXn55vbjB8/XmrcuLGUmJgoJSUlSYMHD5Y6d+4slZaWmtvccMMNUmxsrLRx40Zp48aNUmxsrHTjjTe69P0aYevWrVKLFi2kTp06SU8//bT5cV5Xx+Xk5EjNmzeXHnjgAWnLli1SamqqtHLlSunIkSPmNryujnvjjTek+vXrS7/99puUmpoq/fDDD1Lt2rWlWbNmmdvwuqpbvny59NJLL0kJCQkSAOmnn36yeN5V17C0tFSKjY2VBg8eLCUlJUmJiYlSdHS0NHHiRN2vgQgGNxq47rrrpPHjx1s81rZtW+mFF14wqEfuLTs7WwIgrV27VpIkSSovL5caNWokvf322+Y2ly9flsLDw6X58+dLkiRJubm5kr+/v/Tdd9+Z22RkZEg+Pj7SihUrJEmSpP3790sApM2bN5vbbNq0SQIgHThwwBVvzRAFBQVS69atpcTERGngwIHm4IbX1TnPP/+81K9fP7vP87o6Z9SoUdJDDz1k8ditt94q3XvvvZIk8bo6wzq4ceU1XL58ueTj4yNlZGSY23z77bdSYGCglJeXp8v7dQSnpaqpuLgYO3bsQHx8vMXj8fHx2Lhxo0G9cm95eXkAgHr16gEAUlNTkZWVZXENAwMDMXDgQPM13LFjB0pKSizaREdHIzY21txm06ZNCA8PR8+ePc1tevXqhfDwcK/+WUyYMAGjRo3C0KFDLR7ndXXOsmXL0L17d9x+++2IiIhA165d8dlnn5mf53V1Tr9+/bBq1SocOnQIALBr1y6sX78eI0eOBMDrqgVXXsNNmzYhNjYW0dHR5jbDhw9HUVGRxRSuUWrcxplaO3v2LMrKyhAZGWnxeGRkJLKysgzqlfuSJAlTpkxBv379EBsbCwDm6yR3DU+cOGFuExAQgLp169q0qXx9VlYWIiIibM4ZERHhtT+L7777DklJSdi2bZvNc7yuzjl27BjmzZuHKVOm4MUXX8TWrVsxadIkBAYGYty4cbyuTnr++eeRl5eHtm3bwtfXF2VlZXjzzTdx9913A+DvqxZceQ2zsrJszlO3bl0EBAS4xXVmcKMRk8lk8b0kSTaPETBx4kTs3r0b69evt3nOmWto3Uauvbf+LNLT0/H000/jr7/+QlBQkN12vK6OKS8vR/fu3fHWW28BALp27Yp9+/Zh3rx5GDdunLkdr6tjlixZgkWLFmHx4sXo0KEDdu7ciWeeeQbR0dG4//77ze14XavPVdfQna8zp6WqqUGDBvD19bWJVLOzs22i2pruqaeewrJly7B69Wo0adLE/HijRo0AQPEaNmrUCMXFxTh//rxim9OnT9uc98yZM175s9ixYweys7MRFxcHPz8/+Pn5Ye3atZg9ezb8/PzM75nX1TFRUVFo3769xWPt2rVDWloaAP6+Ouu5557DCy+8gLvuugsdO3bEfffdh8mTJ2PmzJkAeF214Mpr2KhRI5vznD9/HiUlJW5xnRncVFNAQADi4uKQmJho8XhiYiL69OljUK/ciyRJmDhxIpYuXYq///4bMTExFs/HxMSgUaNGFtewuLgYa9euNV/DuLg4+Pv7W7TJzMzE3r17zW169+6NvLw8bN261dxmy5YtyMvL88qfxZAhQ7Bnzx7s3LnT/NW9e3eMHTsWO3fuRMuWLXldndC3b1+bUgWHDh1C8+bNAfD31VkXL16Ej4/lLcfX19e8FJzXtfpceQ179+6NvXv3IjMz09zmr7/+QmBgIOLi4nR9n0JcnMDslSqXgn/xxRfS/v37pWeeeUaqVauWdPz4caO75haeeOIJKTw8XFqzZo2UmZlp/rp48aK5zdtvvy2Fh4dLS5culfbs2SPdfffdsssXmzRpIq1cuVJKSkqSrr/+etnli506dZI2bdokbdq0SerYsaPXLAEVUXW1lCTxujpj69atkp+fn/Tmm29Khw8flr755hspJCREWrRokbkNr6vj7r//fqlx48bmpeBLly6VGjRoIE2bNs3chtdVXUFBgZScnCwlJydLAKT3339fSk5ONpcecdU1rFwKPmTIECkpKUlauXKl1KRJEy4F9zZz5syRmjdvLgUEBEjdunUzL3OmiuWKcl8LFiwwtykvL5deeeUVqVGjRlJgYKA0YMAAac+ePRbHuXTpkjRx4kSpXr16UnBwsHTjjTdKaWlpFm3OnTsnjR07VgoNDZVCQ0OlsWPHSufPn3fBu3QP1sENr6tzfv31Vyk2NlYKDAyU2rZtK3366acWz/O6Oi4/P196+umnpWbNmklBQUFSy5YtpZdeekkqKioyt+F1Vbd69WrZv6f333+/JEmuvYYnTpyQRo0aJQUHB0v16tWTJk6cKF2+fFnPty/MJEmSZMyYEREREZH2mHNDREREXoXBDREREXkVBjdERETkVRjcEBERkVdhcENERERehcENEREReRUGN0RERORVGNwQERGRV2FwQ0RERF6FwQ0RERF5FQY3RERE5FUY3BAREZFX+X/rU9yigdoIBgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(loss_history)\n",
    "plt.ylabel('train loss')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3193f8f6",
   "metadata": {},
   "source": [
    "###  模型保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "de9b018b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存模型\n",
    "torch.save(model.state_dict(), 'seq2seq_params.pt')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21d90ee6",
   "metadata": {},
   "source": [
    "## 模型评估"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89910aec",
   "metadata": {},
   "source": [
    "### bleu指标"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "3e45e46f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "# 计算bleu分数\n",
    "def bleu(label, pred, n):\n",
    "    score = math.exp(min(0, 1 - len(label) / len(pred)))\n",
    "    for k in range(1, n + 1):\n",
    "        num_matches = 0\n",
    "        hashtable = Counter([' '.join(label[i:i + k]) for i in range(len(label) - k + 1)])\n",
    "        for i in range(len(pred) - k + 1):\n",
    "            ngram = ' '.join(pred[i:i + k])\n",
    "            if ngram in hashtable and hashtable[ngram] > 0:\n",
    "                num_matches += 1\n",
    "                hashtable[ngram] -= 1\n",
    "        score *= pow(num_matches / (len(pred) - k + 1), pow(0.5, k))\n",
    "    return score"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d0e50733",
   "metadata": {},
   "source": [
    "### 测试集评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "22f49b48",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.eval()\n",
    "translation_results = []\n",
    "bleu_scores = []\n",
    "# 因为batch_size是1，所以每次取出来的都是单个句子\n",
    "for src_seq, tgt_seq in test_loader:\n",
    "    encoder_inputs = src_seq\n",
    "    hidden = model.encoder(encoder_inputs.to(device))\n",
    "    pred_seq = [tgt_vocab['<bos>']]\n",
    "    for _ in range(8):\n",
    "        # 一步步输出，decoder的输入的形状为(batch_size, seq_len)=(1,1)\n",
    "        decoder_inputs = torch.tensor(pred_seq[-1]).reshape(1, 1).to(device)\n",
    "        # pred形状为 (seq_len, batch_size, vocab_size) = (1, 1, vocab_size)\n",
    "        pred, hidden = model.decoder(decoder_inputs, hidden)\n",
    "        next_token_index = pred.squeeze().argmax().item()\n",
    "        if next_token_index == tgt_vocab['<eos>']:\n",
    "            break\n",
    "        pred_seq.append(next_token_index)\n",
    "    \n",
    "    # 去掉开头的<bos>\n",
    "    pred_seq = tgt_vocab[pred_seq[1:]]\n",
    "    # 因为tgt_seq的形状为(1, seq_len)，我们需要将其转化成(seq_len, )的形状\n",
    "    tgt_seq = tgt_seq.squeeze().tolist()\n",
    "    \n",
    "    # 需要注意在<eos>之前截断\n",
    "    if tgt_vocab['<eos>'] in tgt_seq:\n",
    "        eos_idx = tgt_seq.index(tgt_vocab['<eos>'])\n",
    "        tgt_seq = tgt_vocab[tgt_seq[:eos_idx]]\n",
    "    else:\n",
    "        tgt_seq = tgt_vocab[tgt_seq]\n",
    "    translation_results.append((' '.join(tgt_seq), ' '.join(pred_seq)))\n",
    "    bleu_scores.append(bleu(tgt_seq, pred_seq, n=2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "270137b5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.16821586571116853\n"
     ]
    }
   ],
   "source": [
    "print(sum(bleu_scores) / test_size)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fd8fada1",
   "metadata": {},
   "source": [
    "### 模型效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6bbd3aa5",
   "metadata": {},
   "outputs": [],
   "source": [
    "translation_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b36d3363",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
